00001 package instruction.wordnet;
00002
00003 import instruction.exceptions.WordNetException;
00004 import instruction.semanticObjects.Word;
00005
00006 import java.io.File;
00007 import java.io.FileNotFoundException;
00008 import java.io.FileReader;
00009 import java.io.IOException;
00010 import java.net.MalformedURLException;
00011 import java.util.ArrayList;
00012 import java.util.HashMap;
00013 import java.util.Hashtable;
00014 import java.util.Map;
00015 import java.util.Vector;
00016 import jpl.Query;
00017
00027 public class WordNetRDF {
00028
00030 public static final String RDF_TYPE_NOUN_SYNSET = "http://www.w3.org/2006/03/wn/wn20/schema/NounSynset";
00031
00033 public static final String RDF_TYPE_VERB_SYNSET = "http://www.w3.org/2006/03/wn/wn20/schema/VerbSynset";
00034
00036 public static final String RDF_TYPE_ADJECTIVE_SYNSET = "http://www.w3.org/2006/03/wn/wn20/schema/AdjectiveSatelliteSynset";
00037
00039 public static final String RDF_TYPE_ADVERB_SYNSET = "http://www.w3.org/2006/03/wn/wn20/schema/AdverbSynset";
00040
00041 public static final boolean CACHE_IRREGULAR_FORMS = true;
00042
00043 protected static HashMap<String, String> irregularVerbs = null;
00044
00045 protected static HashMap<String, String> irregularNouns = null;
00046
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00079 public static String getWordStem(String word, String pos) {
00080
00081 word = word.toLowerCase();
00082
00083 String stem = null;
00084
00085 boolean found = false;
00086
00087 if (pos.equals(RDF_TYPE_VERB_SYNSET)) {
00088 stem = word;
00089 found = wordExactlyExistsAs(stem, pos);
00090
00091 if (!found && word.endsWith("ed")) {
00092 stem = word.substring(0, word.length() - 1);
00093 found = wordExactlyExistsAs(stem, pos);
00094 if (!found) {
00095 stem = word.substring(0, word.length() - 2);
00096 found = wordExactlyExistsAs(stem, pos);
00097 }
00098 }
00099
00100 if (!found) {
00101 if (irregularVerbs.get(word) != null) {
00102 stem = irregularVerbs.get(word);
00103 found = wordExactlyExistsAs(stem, pos);
00104 }
00105 }
00106 }
00107
00108 else if (pos.equals(RDF_TYPE_ADVERB_SYNSET)) {
00109 stem = word;
00110 found = wordExactlyExistsAs(stem, pos);
00111 }
00112
00113 else if (pos.equals(RDF_TYPE_ADJECTIVE_SYNSET)) {
00114 stem = word;
00115 found = wordExactlyExistsAs(stem, pos);
00116 }
00117
00118 else if (pos.equals(RDF_TYPE_NOUN_SYNSET)) {
00119 stem = word;
00120 found = wordExactlyExistsAs(stem, pos);
00121 if (!found) {
00122 if (word.endsWith("s")) {
00123 stem = word.substring(0, word.length() - 1);
00124 found = wordExactlyExistsAs(stem, pos);
00125 }
00126 if (!found && word.endsWith("es")) {
00127 stem = word.substring(0, word.length() - 2);
00128 found = wordExactlyExistsAs(stem, pos);
00129 }
00130 if (!found) {
00131 if (irregularNouns.get(word) != null) {
00132 stem = irregularNouns.get(word);
00133 found = wordExactlyExistsAs(stem, pos);
00134 }
00135 }
00136 }
00137 }
00138
00139 if (found)
00140 return stem;
00141 else
00142 return null;
00143 }
00144
00157 public static boolean wordExistsAs(String word, String pos) {
00158
00159 boolean found = wordExactlyExistsAs(word, pos);
00160
00161 if (!found) {
00162
00163 String stem = getWordStem(word, pos);
00164
00165 found = wordExactlyExistsAs(stem, pos);
00166 }
00167
00168 return found;
00169 }
00170
00171 public static String convertPOS(int type) {
00172
00173 String pos = null;
00174
00175 if (type == Word.TYPE_VERB_INFINITIVE
00176 || type == Word.TYPE_PAST_PARTICIPLE
00177 || type == Word.TYPE_GERUND)
00178 pos = RDF_TYPE_VERB_SYNSET;
00179 else if (type == Word.TYPE_NOUN)
00180 pos = RDF_TYPE_NOUN_SYNSET;
00181 else if (type == Word.TYPE_ADV)
00182 pos = RDF_TYPE_ADVERB_SYNSET;
00183 else if (type == Word.TYPE_ADJ)
00184 pos = RDF_TYPE_ADJECTIVE_SYNSET;
00185
00186 return pos;
00187 }
00188
00200 public static boolean wordExactlyExistsAs(String word, String pos) {
00201
00202 if (word == null)
00203 return false;
00204
00205 return getSynsets(word, pos).size() > 0;
00206 }
00207
00208 @SuppressWarnings("unused")
00209 private static void cacheIrregularNouns() throws IOException,
00210 WordNetException {
00211 File noun_exc = new File(absoluteFilePath("etc/wordnet/noun.exc"));
00212 if (!noun_exc.exists())
00213 throw new FileNotFoundException(
00214 "Mapping File for irregular Noun forms ("
00215 + noun_exc.getAbsolutePath() + ") not found");
00216 else {
00217 FileReader reader = new FileReader(noun_exc);
00218 StringBuilder file = new StringBuilder();
00219 char[] buffer = new char[1024];
00220 int read = 0;
00221 while ((read = reader.read(buffer)) >= 0)
00222 file.append(buffer, 0, read);
00223 irregularNouns = new HashMap<String, String>();
00224
00225 String[] mappings = file.toString().split(
00226 System.getProperty("line.separator"));
00227 for (int i = 0; i < mappings.length; i++) {
00228 String[] keyValueSet = mappings[i].split(" ");
00229 if (keyValueSet.length < 2)
00230 throw new WordNetException("Illegal Mapping entry in "
00231 + noun_exc.getName());
00232 else {
00233 irregularNouns.put(keyValueSet[0], keyValueSet[1]);
00234 }
00235 }
00236
00237 }
00238 }
00239
00240
00241
00242
00243
00244
00245
00246
00247
00248
00249
00250
00251
00252
00253
00254
00255
00256 public static String absoluteFilePath(String relativePath) {
00257 return new File(relativePath).getAbsolutePath().replaceAll("\\\\", "/");
00258 }
00259
00268 private static Map<String, Vector<Object>> executeQuery(String query,
00269 String plFile) {
00270
00271
00272
00273 HashMap<String, Vector<Object>> result = new HashMap<String, Vector<Object>>();
00274 Hashtable[] solutions;
00275
00276 Query q = new Query("expand_goal((" + query + "),_9), call(_9)");
00277
00278
00279 if (!q.hasMoreSolutions())
00280 return new HashMap<String, Vector<Object>>();
00281 Hashtable oneSolution = q.nextSolution();
00282 if (oneSolution.isEmpty())
00283
00284 return new HashMap<String, Vector<Object>>();
00285
00286
00287
00288
00289 q.rewind();
00290 solutions = q.allSolutions();
00291
00292 for (Object key : solutions[0].keySet()) {
00293 result.put(key.toString(), new Vector<Object>());
00294 }
00295
00296
00297 for (int i = 0; i < solutions.length; i++) {
00298 Hashtable solution = solutions[i];
00299 for (Object key : solution.keySet()) {
00300 String keyStr = key.toString();
00301
00302 if (!result.containsKey(keyStr)) {
00303
00304
00305 Vector<Object> resultVector = new Vector<Object>();
00306 resultVector.add(i, solution.get(key).toString());
00307 result.put(keyStr, resultVector);
00308
00309 }
00310
00311 Vector<Object> resultVector = result.get(keyStr);
00312 resultVector.add(i, solution.get(key).toString());
00313 }
00314 }
00315
00316 return result;
00317 }
00318
00329 public static ArrayList<String> getSynsets(String word, String pos) {
00330
00331 Map<String, Vector<Object>> results;
00332
00333
00334 results = executeQuery("comp_ehow:word_has_synset_uri('" + word.toLowerCase()
00335 + "', Synsets)", null);
00336
00337
00338
00339
00340 Vector<Object> synsets = results.get("Synsets");
00341
00342 if (synsets == null)
00343 return new ArrayList<String>();
00344
00345 ArrayList<String> synsetArray = new ArrayList<String>();
00346
00347 for (int i = 0; i < synsets.size(); i++)
00348 synsetArray.add(((String) synsets.get(i)).replaceAll("'", ""));
00349
00350 return synsetArray;
00351 }
00352
00353 public static String getSynsetFromID(String synsetID) {
00354 Map<String, Vector<Object>> results;
00355
00356 results = executeQuery("comp_ehow:synset_from_id(Synset,'" + synsetID + "')",
00357 null);
00358
00359 Vector<Object> synsets = results.get("Synset");
00360
00361 if (synsets == null || synsets.size() == 0)
00362 return null;
00363 else
00364 return ((String)synsets.get(0)).replaceAll("'", "");
00365 }
00366
00367 public static String readLine() {
00368 try {
00369 StringBuilder str = new StringBuilder();
00370 int read;
00371 read = System.in.read();
00372
00373 while ((char) read != '\n') {
00374 if (((char) read) != '\r' && (char) read != '\n')
00375 str.append((char) read);
00376
00377 read = System.in.read();
00378 }
00379 return str.toString();
00380 } catch (IOException e) {
00381 e.printStackTrace();
00382 }
00383 return null;
00384 }
00385
00386 public static void main(String[] args) {
00387
00388
00389
00390
00391
00392
00393
00394
00395
00396
00397
00398
00399
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410 do {
00411 String read = readLine();
00412 if (read.equals("exit"))
00413 break;
00414 if (wordExistsAs(read, RDF_TYPE_VERB_SYNSET)) {
00415 System.out.println(getSynsets(read, RDF_TYPE_VERB_SYNSET));
00416 }
00417 } while (true);
00418
00419
00420
00421
00422
00423
00424
00425
00426
00427
00428
00429
00430
00431
00432
00433 }
00434 }