$search
00001 package instruction.postprocessor; 00002 00003 import instruction.exceptions.UnknownWordException; 00004 import instruction.opencyc.OpenCyc; 00005 import instruction.semanticObjects.Instruction; 00006 import instruction.semanticObjects.ObjectX; 00007 import instruction.semanticObjects.Preposition; 00008 import instruction.semanticObjects.SemanticObject; 00009 import instruction.semanticObjects.Word; 00010 import instruction.wordnet.WordNetRDF2; 00011 00012 import java.util.ArrayList; 00013 import java.util.List; 00014 00015 public class BuildObjectNamesPostProcessor implements PostProcessor { 00016 00017 public void run(List<Instruction> instructions) { 00018 00019 for (int i = 0; i < instructions.size(); i++) { 00020 00021 try { 00022 buildObjectNames(instructions.get(i)); 00023 } catch (Exception e) { 00024 System.out.println(e.getMessage()); 00025 } 00026 } 00027 } 00028 00029 public void buildObjectNames(SemanticObject so) throws Exception { 00030 00031 if (so instanceof Instruction) { 00032 Instruction i = (Instruction) so; 00033 00034 List<ObjectX> objects = i.getObjects(); 00035 for (int j = 0; j < objects.size(); j++) { 00036 try { 00037 buildObjectNames(objects.get(j)); 00038 // System.out.println( "OBJECT READY: " + objects.get( j ) 00039 // ); 00040 } catch (Exception e) { 00041 System.out.println(e.getMessage()); 00042 objects.remove(j); 00043 } 00044 } 00045 00046 List<Preposition> pp = i.getPrepositions(); 00047 for (int j = 0; j < pp.size(); j++) 00048 buildObjectNames(pp.get(j)); 00049 } 00050 00051 else if (so instanceof ObjectX) { 00052 ObjectX o = (ObjectX) so; 00053 00054 buildObjectName(o); 00055 00056 List<Preposition> pp = o.getPrepositions(); 00057 for (int j = 0; j < pp.size(); j++) 00058 buildObjectNames(pp.get(j)); 00059 } 00060 00061 else if (so instanceof Preposition) { 00062 Preposition pp = (Preposition) so; 00063 List<ObjectX> o = pp.getObjects(); 00064 for (int j = 0; j < o.size(); j++) { 00065 try { 00066 buildObjectNames(o.get(j)); 00067 } catch (Exception e) { 00068 o.remove(j); 00069 } 00070 } 00071 } 00072 } 00073 00074 private void buildObjectName(ObjectX object) throws Exception { 00075 00076 int step = 0; 00077 int max_steps = 5; 00078 00079 List<Word> objectName = object.getName(); 00080 00081 for (int i = 0; i < objectName.size(); i++) { 00082 if (objectName.get(i).getType() == Word.TYPE_PRONOUN 00083 && objectName.size() > 1) 00084 objectName.remove(i); 00085 00086 else if (objectName.get(i).getType() == Word.TYPE_PRONOUN) 00087 return; 00088 } 00089 00090 if (getNouns(objectName).isEmpty()) 00091 return; 00092 00093 while (step < max_steps) { 00094 00095 switch (step++) { 00096 00097 // ================================================================== 00098 // 1. Step: Check if every Word is classified correctly 00099 // ================================================================== 00100 case 0: 00101 for (int i = 0; i < objectName.size(); i++) { 00102 if (WordNetRDF2.wordExistsAs(objectName.get(i).getLabel(), 00103 WordNetRDF2.convertPOS(objectName.get(i) 00104 .getType()))) 00105 continue; 00106 else { 00107 if (WordNetRDF2.wordExistsAs(objectName.get(i) 00108 .getLabel(), WordNetRDF2.convertPOS(Word.TYPE_NOUN))) 00109 objectName.get(i).setType(Word.TYPE_NOUN); 00110 else if (WordNetRDF2.wordExistsAs(objectName.get(i) 00111 .getLabel(), WordNetRDF2.convertPOS(Word.TYPE_ADJ))) 00112 objectName.get(i).setType(Word.TYPE_ADJ); 00113 else if (WordNetRDF2.wordExistsAs(objectName.get(i) 00114 .getLabel(), WordNetRDF2.convertPOS(Word.TYPE_ADV))) 00115 objectName.get(i).setType(Word.TYPE_ADV); 00116 else if (WordNetRDF2.wordExistsAs(objectName.get(i) 00117 .getLabel(), WordNetRDF2.convertPOS(Word.TYPE_VERB_INFINITIVE))) 00118 objectName.get(i) 00119 .setType(Word.TYPE_VERB_INFINITIVE); 00120 else if (WordNetRDF2.wordExistsAs(objectName.get(i) 00121 .getLabel(), WordNetRDF2.convertPOS(Word.TYPE_PAST_PARTICIPLE))) 00122 objectName.get(i) 00123 .setType(Word.TYPE_PAST_PARTICIPLE); 00124 else if (WordNetRDF2.wordExistsAs(objectName.get(i) 00125 .getLabel(), WordNetRDF2.convertPOS(Word.TYPE_GERUND))) 00126 objectName.get(i).setType(Word.TYPE_GERUND); 00127 else 00128 throw new UnknownWordException("The Word \"" 00129 + objectName.get(i).getLabel() 00130 + "\" is unknown in WordNetRDF2"); 00131 } 00132 } 00133 break; 00134 00135 // ================================================================== 00136 // 2. Step: Find noun compositions like "stove top" or "place mat" 00137 // ================================================================== 00138 case 1: { 00139 objectName = getNounCompositions(objectName); 00140 00141 // 00142 // If there is only one object name remaining, we can stop right 00143 // here 00144 // 00145 if (getNouns(objectName).size() == 1) 00146 step = max_steps; 00147 00148 break; 00149 } 00150 00151 // ================================================================== 00152 // 3. Step: Try to convert the leading nouns into adjectives 00153 // ================================================================== 00154 case 2: 00155 // for ( int i = 0; i < objectName.size(); i++ ) { 00156 // Word w = objectName.get( i ); 00157 // if ( w.getType() == Word.TYPE_NOUN && i < objectName.size() - 00158 // 1 ) { 00159 // if ( WordNetRDF2.wordExistsAs( w.getLabel(), Word.TYPE_ADJ ) ) 00160 // { 00161 // w.setType( Word.TYPE_ADJ ); 00162 // } 00163 // } 00164 // if ( w.getType() == Word.TYPE_NOUN && i == objectName.size() 00165 // - 1 ) { 00166 // objectName = getNounCompositions( objectName ); 00167 // 00168 // if ( getNouns( objectName ).size() == 1 ) 00169 // step = max_steps; 00170 // break; 00171 // } 00172 // 00173 // } 00174 break; 00175 00176 // ================================================================== 00177 // 4. Step: If there is a Word determining a measure 00178 // (UnitOfMeasureConcept), create a Quantifier 00179 // ================================================================== 00180 case 3: { 00181 for (int i = 0; i < objectName.size(); i++) { 00182 ArrayList<String> synsets = WordNetRDF2.getSynsets( 00183 objectName.get(i).getLabel(), WordNetRDF2 00184 .convertPOS(objectName.get(i).getType())); 00185 for (int j = 0; j < synsets.size(); j++) { 00186 ArrayList<String> concepts = OpenCyc.getInstance() 00187 .getCycConceptFromWordNetID( 00188 "N" + synsets.get(j)); 00189 for (int k = 0; k < concepts.size(); k++) { 00190 if (OpenCyc.getInstance().isaInUnivVocMt( 00191 concepts.get(k), "UnitOfMeasureConcept") 00192 && i == 0) { 00193 System.out.println("Recognized quantifier: " 00194 + objectName.get(i) + " (" 00195 + concepts.get(k) + ")"); 00196 object.getQuantifier().setMeasure( 00197 objectName.get(i)); 00198 objectName.remove(i); 00199 objectName = getNounCompositions(objectName); 00200 if (objectName.size() == 1) 00201 step = max_steps; 00202 00203 break; 00204 } 00205 } 00206 } 00207 } 00208 break; 00209 } 00210 00211 case 4: 00212 break; 00213 } 00214 } 00215 00216 // ================================================================== 00217 // 6. Step: Remove all the adjectives from the Object's name 00218 // ================================================================== 00219 for (int i = 0; i < objectName.size(); i++) { 00220 if (objectName.get(i).getType() == Word.TYPE_ADJ 00221 || objectName.get(i).getType() == Word.TYPE_PAST_PARTICIPLE) { 00222 object.getAdjectives().add(objectName.get(i)); 00223 objectName.remove(i--); 00224 } 00225 } 00226 00227 // ================================================================== 00228 // 5. Step: Nest Objects using prepositional "of" relations 00229 // ================================================================== 00230 if (objectName.size() > 1) { 00231 ObjectX last = null; 00232 System.out.println("object name size " + objectName.size()); 00233 for (int i = 0; i < objectName.size(); i++) { 00234 System.out.println("Building new object " + objectName.get(i)); 00235 ObjectX newOb = new ObjectX(); 00236 ArrayList<Word> name = new ArrayList<Word>(); 00237 name.add(objectName.get(i)); 00238 newOb.setName(name); 00239 if (last != null) { 00240 Preposition p = new Preposition(); 00241 p.addPreposition(new Word(Word.TYPE_PREPOSITION, "of")); 00242 p.addObject(last); 00243 newOb.addPreposition(p); 00244 System.out.println("adding prp: " + p.toString()); 00245 } 00246 last = newOb; 00247 } 00248 object.getPrepositions().addAll(last.getPrepositions()); 00249 objectName = last.getName(); 00250 } 00251 00252 // ================================================================== 00253 // 7. Step: Check if the word now can be recognized 00254 // ================================================================== 00255 // if ( objectName.size() > 1 || objectName.size() == 0 ) { 00256 // String w = ""; 00257 // for ( int i = 0; i < objectName.size(); i++ ) 00258 // w = ( w + " " + objectName.get( i ).getLabel() ).trim(); 00259 // throw new UnknownWordException( "The word \"" + w + "\" could not be 00260 // recognized" ); 00261 // } 00262 00263 object.setName(objectName); 00264 // System.out.println(object); 00265 } 00266 00273 private ArrayList<Word> getNouns(List<Word> words) { 00274 00275 ArrayList<Word> nouns = new ArrayList<Word>(); 00276 for (int i = 0; i < words.size(); i++) 00277 if (words.get(i).getType() == Word.TYPE_NOUN) 00278 nouns.add(words.get(i)); 00279 return nouns; 00280 } 00281 00289 private List<Word> getNounCompositions(List<Word> oldList) throws Exception { 00290 00291 ArrayList<Word> nouns = getNouns(oldList); 00292 ArrayList<Word> newList = new ArrayList<Word>(); 00293 00294 for (int i = 0; i < oldList.size(); i++) { 00295 if (oldList.get(i).getType() == Word.TYPE_ADJ 00296 || oldList.get(i).getType() == Word.TYPE_PAST_PARTICIPLE) 00297 newList.add(oldList.get(i)); 00298 } 00299 00300 for (int i = 0; i < nouns.size(); i++) { 00301 00302 Word objectName = new Word(nouns.get(i).getType(), nouns.get(i) 00303 .getLabel()); 00304 Word tmpObjName = new Word(objectName.getType(), objectName 00305 .getLabel()); 00306 int j = i + 1; 00307 while (j <= nouns.size()) { 00308 00309 if (WordNetRDF2.wordExistsAs(tmpObjName.getLabel(), WordNetRDF2 00310 .convertPOS(Word.TYPE_NOUN))) { 00311 00312 objectName.setLabel(tmpObjName.getLabel()); 00313 if (j < nouns.size()) 00314 tmpObjName.setLabel(tmpObjName.getLabel() + " " 00315 + nouns.get(j++).getLabel()); 00316 else { 00317 i = j - 1; 00318 break; 00319 } 00320 } else { 00321 i = j - 2; 00322 break; 00323 } 00324 00325 } 00326 newList.add(objectName); 00327 objectName.setSynSetIDs(WordNetRDF2.getSynsets( 00328 objectName.getLabel(), WordNetRDF2.convertPOS(objectName 00329 .getType()))); 00330 } 00331 00332 return newList; 00333 } 00334 }