Go to the documentation of this file.00001 package instruction.postprocessor;
00002
00003 import instruction.exceptions.UnknownWordException;
00004 import instruction.opencyc.OpenCyc;
00005 import instruction.semanticObjects.Instruction;
00006 import instruction.semanticObjects.ObjectX;
00007 import instruction.semanticObjects.Preposition;
00008 import instruction.semanticObjects.SemanticObject;
00009 import instruction.semanticObjects.Word;
00010 import instruction.wordnet.WordNetRDF2;
00011
00012 import java.util.ArrayList;
00013 import java.util.List;
00014
00015 public class BuildObjectNamesPostProcessor implements PostProcessor {
00016
00017 public void run(List<Instruction> instructions) {
00018
00019 for (int i = 0; i < instructions.size(); i++) {
00020
00021 try {
00022 buildObjectNames(instructions.get(i));
00023 } catch (Exception e) {
00024 System.out.println(e.getMessage());
00025 }
00026 }
00027 }
00028
00029 public void buildObjectNames(SemanticObject so) throws Exception {
00030
00031 if (so instanceof Instruction) {
00032 Instruction i = (Instruction) so;
00033
00034 List<ObjectX> objects = i.getObjects();
00035 for (int j = 0; j < objects.size(); j++) {
00036 try {
00037 buildObjectNames(objects.get(j));
00038
00039
00040 } catch (Exception e) {
00041 System.out.println(e.getMessage());
00042 objects.remove(j);
00043 }
00044 }
00045
00046 List<Preposition> pp = i.getPrepositions();
00047 for (int j = 0; j < pp.size(); j++)
00048 buildObjectNames(pp.get(j));
00049 }
00050
00051 else if (so instanceof ObjectX) {
00052 ObjectX o = (ObjectX) so;
00053
00054 buildObjectName(o);
00055
00056 List<Preposition> pp = o.getPrepositions();
00057 for (int j = 0; j < pp.size(); j++)
00058 buildObjectNames(pp.get(j));
00059 }
00060
00061 else if (so instanceof Preposition) {
00062 Preposition pp = (Preposition) so;
00063 List<ObjectX> o = pp.getObjects();
00064 for (int j = 0; j < o.size(); j++) {
00065 try {
00066 buildObjectNames(o.get(j));
00067 } catch (Exception e) {
00068 o.remove(j);
00069 }
00070 }
00071 }
00072 }
00073
00074 private void buildObjectName(ObjectX object) throws Exception {
00075
00076 int step = 0;
00077 int max_steps = 5;
00078
00079 List<Word> objectName = object.getName();
00080
00081 for (int i = 0; i < objectName.size(); i++) {
00082 if (objectName.get(i).getType() == Word.TYPE_PRONOUN
00083 && objectName.size() > 1)
00084 objectName.remove(i);
00085
00086 else if (objectName.get(i).getType() == Word.TYPE_PRONOUN)
00087 return;
00088 }
00089
00090 if (getNouns(objectName).isEmpty())
00091 return;
00092
00093 while (step < max_steps) {
00094
00095 switch (step++) {
00096
00097
00098
00099
00100 case 0:
00101 for (int i = 0; i < objectName.size(); i++) {
00102 if (WordNetRDF2.wordExistsAs(objectName.get(i).getLabel(),
00103 WordNetRDF2.convertPOS(objectName.get(i)
00104 .getType())))
00105 continue;
00106 else {
00107 if (WordNetRDF2.wordExistsAs(objectName.get(i)
00108 .getLabel(), WordNetRDF2.convertPOS(Word.TYPE_NOUN)))
00109 objectName.get(i).setType(Word.TYPE_NOUN);
00110 else if (WordNetRDF2.wordExistsAs(objectName.get(i)
00111 .getLabel(), WordNetRDF2.convertPOS(Word.TYPE_ADJ)))
00112 objectName.get(i).setType(Word.TYPE_ADJ);
00113 else if (WordNetRDF2.wordExistsAs(objectName.get(i)
00114 .getLabel(), WordNetRDF2.convertPOS(Word.TYPE_ADV)))
00115 objectName.get(i).setType(Word.TYPE_ADV);
00116 else if (WordNetRDF2.wordExistsAs(objectName.get(i)
00117 .getLabel(), WordNetRDF2.convertPOS(Word.TYPE_VERB_INFINITIVE)))
00118 objectName.get(i)
00119 .setType(Word.TYPE_VERB_INFINITIVE);
00120 else if (WordNetRDF2.wordExistsAs(objectName.get(i)
00121 .getLabel(), WordNetRDF2.convertPOS(Word.TYPE_PAST_PARTICIPLE)))
00122 objectName.get(i)
00123 .setType(Word.TYPE_PAST_PARTICIPLE);
00124 else if (WordNetRDF2.wordExistsAs(objectName.get(i)
00125 .getLabel(), WordNetRDF2.convertPOS(Word.TYPE_GERUND)))
00126 objectName.get(i).setType(Word.TYPE_GERUND);
00127 else
00128 throw new UnknownWordException("The Word \""
00129 + objectName.get(i).getLabel()
00130 + "\" is unknown in WordNetRDF2");
00131 }
00132 }
00133 break;
00134
00135
00136
00137
00138 case 1: {
00139 objectName = getNounCompositions(objectName);
00140
00141
00142
00143
00144
00145 if (getNouns(objectName).size() == 1)
00146 step = max_steps;
00147
00148 break;
00149 }
00150
00151
00152
00153
00154 case 2:
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174 break;
00175
00176
00177
00178
00179
00180 case 3: {
00181 for (int i = 0; i < objectName.size(); i++) {
00182 ArrayList<String> synsets = WordNetRDF2.getSynsets(
00183 objectName.get(i).getLabel(), WordNetRDF2
00184 .convertPOS(objectName.get(i).getType()));
00185 for (int j = 0; j < synsets.size(); j++) {
00186 ArrayList<String> concepts = OpenCyc.getInstance()
00187 .getCycConceptFromWordNetID(
00188 "N" + synsets.get(j));
00189 for (int k = 0; k < concepts.size(); k++) {
00190 if (OpenCyc.getInstance().isaInUnivVocMt(
00191 concepts.get(k), "UnitOfMeasureConcept")
00192 && i == 0) {
00193 System.out.println("Recognized quantifier: "
00194 + objectName.get(i) + " ("
00195 + concepts.get(k) + ")");
00196 object.getQuantifier().setMeasure(
00197 objectName.get(i));
00198 objectName.remove(i);
00199 objectName = getNounCompositions(objectName);
00200 if (objectName.size() == 1)
00201 step = max_steps;
00202
00203 break;
00204 }
00205 }
00206 }
00207 }
00208 break;
00209 }
00210
00211 case 4:
00212 break;
00213 }
00214 }
00215
00216
00217
00218
00219 for (int i = 0; i < objectName.size(); i++) {
00220 if (objectName.get(i).getType() == Word.TYPE_ADJ
00221 || objectName.get(i).getType() == Word.TYPE_PAST_PARTICIPLE) {
00222 object.getAdjectives().add(objectName.get(i));
00223 objectName.remove(i--);
00224 }
00225 }
00226
00227
00228
00229
00230 if (objectName.size() > 1) {
00231 ObjectX last = null;
00232 System.out.println("object name size " + objectName.size());
00233 for (int i = 0; i < objectName.size(); i++) {
00234 System.out.println("Building new object " + objectName.get(i));
00235 ObjectX newOb = new ObjectX();
00236 ArrayList<Word> name = new ArrayList<Word>();
00237 name.add(objectName.get(i));
00238 newOb.setName(name);
00239 if (last != null) {
00240 Preposition p = new Preposition();
00241 p.addPreposition(new Word(Word.TYPE_PREPOSITION, "of"));
00242 p.addObject(last);
00243 newOb.addPreposition(p);
00244 System.out.println("adding prp: " + p.toString());
00245 }
00246 last = newOb;
00247 }
00248 object.getPrepositions().addAll(last.getPrepositions());
00249 objectName = last.getName();
00250 }
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263 object.setName(objectName);
00264
00265 }
00266
00273 private ArrayList<Word> getNouns(List<Word> words) {
00274
00275 ArrayList<Word> nouns = new ArrayList<Word>();
00276 for (int i = 0; i < words.size(); i++)
00277 if (words.get(i).getType() == Word.TYPE_NOUN)
00278 nouns.add(words.get(i));
00279 return nouns;
00280 }
00281
00289 private List<Word> getNounCompositions(List<Word> oldList) throws Exception {
00290
00291 ArrayList<Word> nouns = getNouns(oldList);
00292 ArrayList<Word> newList = new ArrayList<Word>();
00293
00294 for (int i = 0; i < oldList.size(); i++) {
00295 if (oldList.get(i).getType() == Word.TYPE_ADJ
00296 || oldList.get(i).getType() == Word.TYPE_PAST_PARTICIPLE)
00297 newList.add(oldList.get(i));
00298 }
00299
00300 for (int i = 0; i < nouns.size(); i++) {
00301
00302 Word objectName = new Word(nouns.get(i).getType(), nouns.get(i)
00303 .getLabel());
00304 Word tmpObjName = new Word(objectName.getType(), objectName
00305 .getLabel());
00306 int j = i + 1;
00307 while (j <= nouns.size()) {
00308
00309 if (WordNetRDF2.wordExistsAs(tmpObjName.getLabel(), WordNetRDF2
00310 .convertPOS(Word.TYPE_NOUN))) {
00311
00312 objectName.setLabel(tmpObjName.getLabel());
00313 if (j < nouns.size())
00314 tmpObjName.setLabel(tmpObjName.getLabel() + " "
00315 + nouns.get(j++).getLabel());
00316 else {
00317 i = j - 1;
00318 break;
00319 }
00320 } else {
00321 i = j - 2;
00322 break;
00323 }
00324
00325 }
00326 newList.add(objectName);
00327 objectName.setSynSetIDs(WordNetRDF2.getSynsets(
00328 objectName.getLabel(), WordNetRDF2.convertPOS(objectName
00329 .getType())));
00330 }
00331
00332 return newList;
00333 }
00334 }