Go to the documentation of this file.00001 package org.ros.nlp;
00002
00003 import java.io.CharArrayReader;
00004 import java.io.InputStream;
00005 import java.io.ObjectInputStream;
00006 import java.util.Collection;
00007 import java.util.List;
00008
00009 import org.apache.commons.logging.Log;
00010 import org.ros.node.DefaultNodeFactory;
00011 import org.ros.message.MessageListener;
00012 import org.ros.node.Node;
00013 import org.ros.node.NodeConfiguration;
00014 import org.ros.node.NodeMain;
00015 import org.ros.node.topic.Publisher;
00016 import org.ros.message.stanford_parser_msgs.Dependency;
00017 import org.ros.message.stanford_parser_msgs.Parse;
00018 import org.ros.message.stanford_parser_msgs.SquashedParseTreeNode;
00019
00020 import edu.stanford.nlp.ling.HasWord;
00021 import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
00022 import edu.stanford.nlp.process.Tokenizer;
00023 import edu.stanford.nlp.trees.EnglishGrammaticalStructure;
00024 import edu.stanford.nlp.trees.GrammaticalRelation;
00025 import edu.stanford.nlp.trees.PennTreebankLanguagePack;
00026 import edu.stanford.nlp.trees.Tree;
00027 import edu.stanford.nlp.trees.TreebankLanguagePack;
00028 import edu.stanford.nlp.trees.TreeGraphNode;
00029 import edu.stanford.nlp.trees.TypedDependency;
00030
00031
00032
00033 public class Parser implements NodeMain {
00034
00035
00036
00037 private LexicalizedParser lp;
00038 private Publisher<Parse> publisher;
00039
00040
00041
00042 private MessageListener<org.ros.message.std_msgs.String> text_callback =
00043 new MessageListener<org.ros.message.std_msgs.String>() {
00044 @Override
00045 public void onNewMessage(org.ros.message.std_msgs.String message) {
00046
00047
00048 String text = message.data;
00049 CharArrayReader reader = new CharArrayReader(text.toCharArray());
00050 TreebankLanguagePack tlp = new PennTreebankLanguagePack();
00051 Tokenizer<? extends HasWord> toke = tlp.getTokenizerFactory().getTokenizer(reader);
00052 List<? extends HasWord> wordList = toke.tokenize();
00053 Tree parse = null;
00054 if (lp.parse(wordList)) {
00055 parse = lp.getBestParse();
00056 }
00057
00058
00059 EnglishGrammaticalStructure egs = new EnglishGrammaticalStructure(parse);
00060
00061
00062
00063 Collection<TypedDependency> collapsed = egs.typedDependenciesCollapsedTree();
00064
00065
00066
00067 System.out.println(parse.toString());
00068 Parse p = squashParseTree(parse);
00069
00070 for(TypedDependency td : collapsed) {
00071 System.out.println(td.toString());
00072 GrammaticalRelation relation = td.reln();
00073 TreeGraphNode governor = td.gov();
00074 TreeGraphNode dependent = td.dep();
00075 Dependency dependency = new Dependency();
00076
00077
00078 String shortName = relation.getShortName();
00079 String specific = relation.getSpecific();
00080 if(specific == null) {
00081 dependency.relation = shortName;
00082 }
00083 else {
00084 dependency.relation = shortName + '_' + specific;
00085 }
00086
00087
00088 dependency.governor_index = governor.index() - 1;
00089 dependency.dependent_index = dependent.index() - 1;
00090 p.dependencies.add(dependency);
00091 }
00092
00093 publisher.publish(p);
00094 }
00095 };
00096
00097
00098
00099 private Parse squashParseTree(Tree tree) {
00100 Parse p = new Parse();
00101 squashParseTreeRecursive(tree, -1, p);
00102 return p;
00103 }
00104
00105
00106
00107 private void squashParseTreeRecursive(Tree tree, int parentNodeIndex, Parse p) {
00108
00109 int in = p.nodes.size();
00110 SquashedParseTreeNode sptn = new SquashedParseTreeNode();
00111
00112 String tag = tree.value();
00113 try {
00114 sptn.tag = lookupTagInt(tag);
00115 }
00116 catch(Exception e) {
00117
00118 int it = p.tags.size();
00119 p.tags.add(tag);
00120 sptn.tag = it;
00121 }
00122
00123 sptn.parent_id = parentNodeIndex;
00124
00125 sptn.score = tree.score();
00126
00127 boolean atWord = tree.isPreTerminal();
00128 if(atWord) {
00129 int iw = p.words.size();
00130 p.words.add(tree.firstChild().value());
00131 sptn.word_index = iw;
00132 }
00133 else {
00134 sptn.word_index = -1;
00135 }
00136 p.nodes.add(sptn);
00137
00138
00139 if(!atWord) {
00140 for(int i=0; i<tree.numChildren(); i++) {
00141 squashParseTreeRecursive(tree.getChild(i), in, p);
00142 }
00143 }
00144 }
00145
00146
00147
00148 private int lookupTagInt(String s) throws Exception {
00149
00150 String s2 = new String(s);
00151 s2.replace('-', '_').replace("$", "_S");
00152
00153
00154 return SquashedParseTreeNode.class.getField(s2).getInt(null);
00155 }
00156
00157
00158
00159 @Override
00160 public void main(NodeConfiguration configuration) {
00161 Node node = null;
00162 Log log = null;
00163 try {
00164
00165 InputStream is = Parser.class.getResourceAsStream("englishPCFG.ser");
00166 ObjectInputStream ois = new ObjectInputStream(is);
00167 lp = new LexicalizedParser(ois);
00168
00169
00170 node = new DefaultNodeFactory().newNode("stanford_parser_ros", configuration);
00171 log = node.getLog();
00172 publisher = node.newPublisher("parse", "stanford_parser_msgs/Parse");
00173 node.newSubscriber("text", "std_msgs/String", text_callback);
00174 }
00175 catch (Exception e) {
00176 if (node != null) {
00177 log.fatal(e);
00178 }
00179 e.printStackTrace();
00180 return;
00181 }
00182 }
00183
00184
00185
00186 @Override
00187 public void shutdown() {
00188 return;
00189 }
00190 }
00191