$search
00001 package instruction.wordnet; 00002 00003 import instruction.exceptions.WordNetException; 00004 import instruction.semanticObjects.Word; 00005 00006 import java.io.File; 00007 import java.io.FileNotFoundException; 00008 import java.io.FileReader; 00009 import java.io.IOException; 00010 import java.net.MalformedURLException; 00011 import java.util.ArrayList; 00012 import java.util.HashMap; 00013 import java.util.Hashtable; 00014 import java.util.Map; 00015 import java.util.Vector; 00016 import jpl.Query; 00017 00027 public class WordNetRDF { 00028 00030 public static final String RDF_TYPE_NOUN_SYNSET = "http://www.w3.org/2006/03/wn/wn20/schema/NounSynset"; 00031 00033 public static final String RDF_TYPE_VERB_SYNSET = "http://www.w3.org/2006/03/wn/wn20/schema/VerbSynset"; 00034 00036 public static final String RDF_TYPE_ADJECTIVE_SYNSET = "http://www.w3.org/2006/03/wn/wn20/schema/AdjectiveSatelliteSynset"; 00037 00039 public static final String RDF_TYPE_ADVERB_SYNSET = "http://www.w3.org/2006/03/wn/wn20/schema/AdverbSynset"; 00040 00041 public static final boolean CACHE_IRREGULAR_FORMS = true; 00042 00043 protected static HashMap<String, String> irregularVerbs = null; 00044 00045 protected static HashMap<String, String> irregularNouns = null; 00046 00050 // static { 00051 // try { 00052 // Vector<String> args = new Vector<String>(Arrays.asList(Prolog 00053 // .get_default_init_args())); 00054 // args.add("-G128M"); 00055 // // args.add("-q"); 00056 // args.add("-nosignals"); 00057 // Prolog.set_default_init_args(args.toArray(new String[0])); 00058 // 00059 // // load the startup files and modules for WordNet 00060 // new Query("ensure_loaded('" 00061 // + absoluteFilePath("etc/wordnet/wordnet.pl") + "')") 00062 // .oneSolution(); 00063 // new Query("rdf_db:rdf_load('" 00064 // + absoluteFilePath("etc/wordnet/wordnet-synset.rdf" + "')")) 00065 // .oneSolution(); 00066 // 00067 // cacheIrregularNouns(); 00068 // cacheIrregularVerbs(); 00069 // 00070 // } catch (Exception e) { 00071 // e.printStackTrace(); 00072 // } 00073 // } 00074 00079 public static String getWordStem(String word, String pos) { 00080 00081 word = word.toLowerCase(); 00082 00083 String stem = null; 00084 00085 boolean found = false; 00086 00087 if (pos.equals(RDF_TYPE_VERB_SYNSET)) { 00088 stem = word; 00089 found = wordExactlyExistsAs(stem, pos); 00090 00091 if (!found && word.endsWith("ed")) { 00092 stem = word.substring(0, word.length() - 1); 00093 found = wordExactlyExistsAs(stem, pos); 00094 if (!found) { 00095 stem = word.substring(0, word.length() - 2); 00096 found = wordExactlyExistsAs(stem, pos); 00097 } 00098 } 00099 00100 if (!found) { 00101 if (irregularVerbs.get(word) != null) { 00102 stem = irregularVerbs.get(word); 00103 found = wordExactlyExistsAs(stem, pos); 00104 } 00105 } 00106 } 00107 00108 else if (pos.equals(RDF_TYPE_ADVERB_SYNSET)) { 00109 stem = word; 00110 found = wordExactlyExistsAs(stem, pos); 00111 } 00112 00113 else if (pos.equals(RDF_TYPE_ADJECTIVE_SYNSET)) { 00114 stem = word; 00115 found = wordExactlyExistsAs(stem, pos); 00116 } 00117 00118 else if (pos.equals(RDF_TYPE_NOUN_SYNSET)) { 00119 stem = word; 00120 found = wordExactlyExistsAs(stem, pos); 00121 if (!found) { 00122 if (word.endsWith("s")) { 00123 stem = word.substring(0, word.length() - 1); 00124 found = wordExactlyExistsAs(stem, pos); 00125 } 00126 if (!found && word.endsWith("es")) { 00127 stem = word.substring(0, word.length() - 2); 00128 found = wordExactlyExistsAs(stem, pos); 00129 } 00130 if (!found) { 00131 if (irregularNouns.get(word) != null) { 00132 stem = irregularNouns.get(word); 00133 found = wordExactlyExistsAs(stem, pos); 00134 } 00135 } 00136 } 00137 } 00138 00139 if (found) 00140 return stem; 00141 else 00142 return null; 00143 } 00144 00157 public static boolean wordExistsAs(String word, String pos) { 00158 00159 boolean found = wordExactlyExistsAs(word, pos); 00160 00161 if (!found) { 00162 00163 String stem = getWordStem(word, pos); 00164 00165 found = wordExactlyExistsAs(stem, pos); 00166 } 00167 00168 return found; 00169 } 00170 00171 public static String convertPOS(int type) { 00172 00173 String pos = null; 00174 00175 if (type == Word.TYPE_VERB_INFINITIVE 00176 || type == Word.TYPE_PAST_PARTICIPLE 00177 || type == Word.TYPE_GERUND) 00178 pos = RDF_TYPE_VERB_SYNSET; 00179 else if (type == Word.TYPE_NOUN) 00180 pos = RDF_TYPE_NOUN_SYNSET; 00181 else if (type == Word.TYPE_ADV) 00182 pos = RDF_TYPE_ADVERB_SYNSET; 00183 else if (type == Word.TYPE_ADJ) 00184 pos = RDF_TYPE_ADJECTIVE_SYNSET; 00185 00186 return pos; 00187 } 00188 00200 public static boolean wordExactlyExistsAs(String word, String pos) { 00201 00202 if (word == null) 00203 return false; 00204 00205 return getSynsets(word, pos).size() > 0; 00206 } 00207 00208 @SuppressWarnings("unused") 00209 private static void cacheIrregularNouns() throws IOException, 00210 WordNetException { 00211 File noun_exc = new File(absoluteFilePath("etc/wordnet/noun.exc")); 00212 if (!noun_exc.exists()) 00213 throw new FileNotFoundException( 00214 "Mapping File for irregular Noun forms (" 00215 + noun_exc.getAbsolutePath() + ") not found"); 00216 else { 00217 FileReader reader = new FileReader(noun_exc); 00218 StringBuilder file = new StringBuilder(); 00219 char[] buffer = new char[1024]; 00220 int read = 0; 00221 while ((read = reader.read(buffer)) >= 0) 00222 file.append(buffer, 0, read); 00223 irregularNouns = new HashMap<String, String>(); 00224 00225 String[] mappings = file.toString().split( 00226 System.getProperty("line.separator")); 00227 for (int i = 0; i < mappings.length; i++) { 00228 String[] keyValueSet = mappings[i].split(" "); 00229 if (keyValueSet.length < 2) 00230 throw new WordNetException("Illegal Mapping entry in " 00231 + noun_exc.getName()); 00232 else { 00233 irregularNouns.put(keyValueSet[0], keyValueSet[1]); 00234 } 00235 } 00236 00237 } 00238 } 00239 00240 // public static void initializeMissingMappings() { 00241 // 00242 // missingNouns = new HashMap<String, String>(); 00243 // missingNouns.put("stove burner", "04163507"); 00244 // missingNouns.put("stove top", "TUM00001"); 00245 // missingNouns.put("eating bowl", "02778691"); 00246 // missingNouns.put("packet soup", "07115585"); 00247 // missingNouns.put("contents", "07474342"); 00248 // missingNouns.put("iced tea", "N07456631"); 00249 // 00250 // missingVerbs = new HashMap<String, String>(); 00251 // missingVerbs.put("look at", "TUM00002"); 00252 // missingVerbs.put("bring out", "01145176"); 00253 // 00254 // } 00255 00256 public static String absoluteFilePath(String relativePath) { 00257 return new File(relativePath).getAbsolutePath().replaceAll("\\\\", "/"); 00258 } 00259 00268 private static Map<String, Vector<Object>> executeQuery(String query, 00269 String plFile) { 00270 00271 // System.err.println("Executing query: " + query); 00272 00273 HashMap<String, Vector<Object>> result = new HashMap<String, Vector<Object>>(); 00274 Hashtable[] solutions; 00275 00276 Query q = new Query("expand_goal((" + query + "),_9), call(_9)"); 00277 00278 // Due to bugs we have to check for one answer beforehand. 00279 if (!q.hasMoreSolutions()) 00280 return new HashMap<String, Vector<Object>>(); 00281 Hashtable oneSolution = q.nextSolution(); 00282 if (oneSolution.isEmpty()) // Due to a bug consulting a file without 00283 // anything else results in shutdown 00284 return new HashMap<String, Vector<Object>>(); // I will try to 00285 // prevent it with 00286 // this construction 00287 00288 // Restart the query and fetch everything. 00289 q.rewind(); 00290 solutions = q.allSolutions(); 00291 00292 for (Object key : solutions[0].keySet()) { 00293 result.put(key.toString(), new Vector<Object>()); 00294 } 00295 00296 // Build the result 00297 for (int i = 0; i < solutions.length; i++) { 00298 Hashtable solution = solutions[i]; 00299 for (Object key : solution.keySet()) { 00300 String keyStr = key.toString(); 00301 00302 if (!result.containsKey(keyStr)) { 00303 00304 // previously unknown column, add result vector 00305 Vector<Object> resultVector = new Vector<Object>(); 00306 resultVector.add(i, solution.get(key).toString()); 00307 result.put(keyStr, resultVector); 00308 00309 } 00310 // Put the solution into the correct vector 00311 Vector<Object> resultVector = result.get(keyStr); 00312 resultVector.add(i, solution.get(key).toString()); 00313 } 00314 } 00315 // Generate the final QueryResult and return 00316 return result; 00317 } 00318 00329 public static ArrayList<String> getSynsets(String word, String pos) { 00330 00331 Map<String, Vector<Object>> results; 00332 00333 // if (pos == null) 00334 results = executeQuery("comp_ehow:word_has_synset_uri('" + word.toLowerCase() 00335 + "', Synsets)", null); 00336 // else 00337 // results = executeQuery("word_has_synset_of_type('" 00338 // + word.toLowerCase() + "', Synsets, '" + pos + "')", null); 00339 00340 Vector<Object> synsets = results.get("Synsets"); 00341 00342 if (synsets == null) 00343 return new ArrayList<String>(); 00344 00345 ArrayList<String> synsetArray = new ArrayList<String>(); 00346 00347 for (int i = 0; i < synsets.size(); i++) 00348 synsetArray.add(((String) synsets.get(i)).replaceAll("'", "")); 00349 00350 return synsetArray; 00351 } 00352 00353 public static String getSynsetFromID(String synsetID) { 00354 Map<String, Vector<Object>> results; 00355 00356 results = executeQuery("comp_ehow:synset_from_id(Synset,'" + synsetID + "')", 00357 null); 00358 00359 Vector<Object> synsets = results.get("Synset"); 00360 00361 if (synsets == null || synsets.size() == 0) 00362 return null; 00363 else 00364 return ((String)synsets.get(0)).replaceAll("'", ""); 00365 } 00366 00367 public static String readLine() { 00368 try { 00369 StringBuilder str = new StringBuilder(); 00370 int read; 00371 read = System.in.read(); 00372 00373 while ((char) read != '\n') { 00374 if (((char) read) != '\r' && (char) read != '\n') 00375 str.append((char) read); 00376 00377 read = System.in.read(); 00378 } 00379 return str.toString(); 00380 } catch (IOException e) { 00381 e.printStackTrace(); 00382 } 00383 return null; 00384 } 00385 00386 public static void main(String[] args) { 00387 // 00388 // WordNetRDF wn = new WordNetRDF(); 00389 // Map<String, Vector<Object>> synsetIds = wn.executeQuery( 00390 // "word_has_synset_id('cup', SynsetId)", null); 00391 // for (String key : synsetIds.keySet()) { 00392 // System.out.println(key + " -> " + synsetIds.get(key).toString()); 00393 // } 00394 // 00395 // String[] synsets = wn.getSynsets("cup", null); 00396 // for (String synset : synsets) 00397 // System.out.println(synset); 00398 // 00399 // Map<String, Vector<Object>> types = wn 00400 // .executeQuery( 00401 // "synset_type('http://www.w3.org/2006/03/wn/wn20/instances/synset-cup-noun-3', Type)", 00402 // null); 00403 // for (String key : types.keySet()) { 00404 // System.out.println(key + " -> " + types.get(key).toString()); 00405 // 00406 // } 00407 00408 // System.out.println(getSynsetFromID("302351490")); 00409 00410 do { 00411 String read = readLine(); 00412 if (read.equals("exit")) 00413 break; 00414 if (wordExistsAs(read, RDF_TYPE_VERB_SYNSET)) { 00415 System.out.println(getSynsets(read, RDF_TYPE_VERB_SYNSET)); 00416 } 00417 } while (true); 00418 00419 // try { 00420 // if ( wordExistsAs( "chopped", Word.TYPE_PAST_PARTICIPLE ) ) 00421 // System.out.println( "Word found" ); 00422 // else 00423 // System.out.println( "Word not found" ); 00424 // } 00425 // catch ( MalformedURLException e ) { 00426 // // TODO Auto-generated catch block 00427 // e.printStackTrace(); 00428 // } 00429 // catch ( Exception e ) { 00430 // // TODO Auto-generated catch block 00431 // e.printStackTrace(); 00432 // } 00433 } 00434 }