00001 package instruction.wordnet;
00002 
00003 import instruction.exceptions.WordNetException;
00004 import instruction.semanticObjects.Word;
00005 
00006 import java.io.File;
00007 import java.io.FileNotFoundException;
00008 import java.io.FileReader;
00009 import java.io.IOException;
00010 import java.net.MalformedURLException;
00011 import java.util.ArrayList;
00012 import java.util.HashMap;
00013 import java.util.Hashtable;
00014 import java.util.Map;
00015 import java.util.Vector;
00016 import jpl.Query;
00017 
00027 public class WordNetRDF {
00028 
00030         public static final String RDF_TYPE_NOUN_SYNSET = "http://www.w3.org/2006/03/wn/wn20/schema/NounSynset";
00031 
00033         public static final String RDF_TYPE_VERB_SYNSET = "http://www.w3.org/2006/03/wn/wn20/schema/VerbSynset";
00034 
00036         public static final String RDF_TYPE_ADJECTIVE_SYNSET = "http://www.w3.org/2006/03/wn/wn20/schema/AdjectiveSatelliteSynset";
00037 
00039         public static final String RDF_TYPE_ADVERB_SYNSET = "http://www.w3.org/2006/03/wn/wn20/schema/AdverbSynset";
00040 
00041         public static final boolean CACHE_IRREGULAR_FORMS = true;
00042 
00043         protected static HashMap<String, String> irregularVerbs = null;
00044 
00045         protected static HashMap<String, String> irregularNouns = null;
00046 
00050 
00051 
00052 
00053 
00054 
00055 
00056 
00057 
00058 
00059 
00060 
00061 
00062 
00063 
00064 
00065 
00066 
00067 
00068 
00069 
00070 
00071 
00072 
00073 
00074 
00079         public static String getWordStem(String word, String pos) {
00080 
00081                 word = word.toLowerCase();
00082 
00083                 String stem = null;
00084 
00085                 boolean found = false;
00086 
00087                 if (pos.equals(RDF_TYPE_VERB_SYNSET)) {
00088                         stem = word;
00089                         found = wordExactlyExistsAs(stem, pos);
00090 
00091                         if (!found && word.endsWith("ed")) {
00092                                 stem = word.substring(0, word.length() - 1);
00093                                 found = wordExactlyExistsAs(stem, pos);
00094                                 if (!found) {
00095                                         stem = word.substring(0, word.length() - 2);
00096                                         found = wordExactlyExistsAs(stem, pos);
00097                                 }
00098                         }
00099 
00100                         if (!found) {
00101                                 if (irregularVerbs.get(word) != null) {
00102                                         stem = irregularVerbs.get(word);
00103                                         found = wordExactlyExistsAs(stem, pos);
00104                                 }
00105                         }
00106                 }
00107 
00108                 else if (pos.equals(RDF_TYPE_ADVERB_SYNSET)) {
00109                         stem = word;
00110                         found = wordExactlyExistsAs(stem, pos);
00111                 }
00112 
00113                 else if (pos.equals(RDF_TYPE_ADJECTIVE_SYNSET)) {
00114                         stem = word;
00115                         found = wordExactlyExistsAs(stem, pos);
00116                 }
00117 
00118                 else if (pos.equals(RDF_TYPE_NOUN_SYNSET)) {
00119                         stem = word;
00120                         found = wordExactlyExistsAs(stem, pos);
00121                         if (!found) {
00122                                 if (word.endsWith("s")) {
00123                                         stem = word.substring(0, word.length() - 1);
00124                                         found = wordExactlyExistsAs(stem, pos);
00125                                 }
00126                                 if (!found && word.endsWith("es")) {
00127                                         stem = word.substring(0, word.length() - 2);
00128                                         found = wordExactlyExistsAs(stem, pos);
00129                                 }
00130                                 if (!found) {
00131                                         if (irregularNouns.get(word) != null) {
00132                                                 stem = irregularNouns.get(word);
00133                                                 found = wordExactlyExistsAs(stem, pos);
00134                                         }
00135                                 }
00136                         }
00137                 }
00138 
00139                 if (found)
00140                         return stem;
00141                 else
00142                         return null;
00143         }
00144 
00157         public static boolean wordExistsAs(String word, String pos) {
00158 
00159                 boolean found = wordExactlyExistsAs(word, pos);
00160 
00161                 if (!found) {
00162 
00163                         String stem = getWordStem(word, pos);
00164 
00165                         found = wordExactlyExistsAs(stem, pos);
00166                 }
00167 
00168                 return found;
00169         }
00170 
00171         public static String convertPOS(int type) {
00172 
00173                 String pos = null;
00174 
00175                 if (type == Word.TYPE_VERB_INFINITIVE
00176                                 || type == Word.TYPE_PAST_PARTICIPLE
00177                                 || type == Word.TYPE_GERUND)
00178                         pos = RDF_TYPE_VERB_SYNSET;
00179                 else if (type == Word.TYPE_NOUN)
00180                         pos = RDF_TYPE_NOUN_SYNSET;
00181                 else if (type == Word.TYPE_ADV)
00182                         pos = RDF_TYPE_ADVERB_SYNSET;
00183                 else if (type == Word.TYPE_ADJ)
00184                         pos = RDF_TYPE_ADJECTIVE_SYNSET;
00185 
00186                 return pos;
00187         }
00188 
00200         public static boolean wordExactlyExistsAs(String word, String pos) {
00201 
00202                 if (word == null)
00203                         return false;
00204 
00205                 return getSynsets(word, pos).size() > 0;
00206         }
00207 
00208         @SuppressWarnings("unused")
00209         private static void cacheIrregularNouns() throws IOException,
00210                         WordNetException {
00211                 File noun_exc = new File(absoluteFilePath("etc/wordnet/noun.exc"));
00212                 if (!noun_exc.exists())
00213                         throw new FileNotFoundException(
00214                                         "Mapping File for irregular Noun forms ("
00215                                                         + noun_exc.getAbsolutePath() + ") not found");
00216                 else {
00217                         FileReader reader = new FileReader(noun_exc);
00218                         StringBuilder file = new StringBuilder();
00219                         char[] buffer = new char[1024];
00220                         int read = 0;
00221                         while ((read = reader.read(buffer)) >= 0)
00222                                 file.append(buffer, 0, read);
00223                         irregularNouns = new HashMap<String, String>();
00224 
00225                         String[] mappings = file.toString().split(
00226                                         System.getProperty("line.separator"));
00227                         for (int i = 0; i < mappings.length; i++) {
00228                                 String[] keyValueSet = mappings[i].split(" ");
00229                                 if (keyValueSet.length < 2)
00230                                         throw new WordNetException("Illegal Mapping entry in "
00231                                                         + noun_exc.getName());
00232                                 else {
00233                                         irregularNouns.put(keyValueSet[0], keyValueSet[1]);
00234                                 }
00235                         }
00236 
00237                 }
00238         }
00239 
00240         
00241         
00242         
00243         
00244         
00245         
00246         
00247         
00248         
00249         
00250         
00251         
00252         
00253         
00254         
00255 
00256         public static String absoluteFilePath(String relativePath) {
00257                 return new File(relativePath).getAbsolutePath().replaceAll("\\\\", "/");
00258         }
00259 
00268         private static Map<String, Vector<Object>> executeQuery(String query,
00269                         String plFile) {
00270 
00271                 
00272 
00273                 HashMap<String, Vector<Object>> result = new HashMap<String, Vector<Object>>();
00274                 Hashtable[] solutions;
00275 
00276                 Query q = new Query("expand_goal((" + query + "),_9), call(_9)");
00277 
00278                 
00279                 if (!q.hasMoreSolutions())
00280                         return new HashMap<String, Vector<Object>>();
00281                 Hashtable oneSolution = q.nextSolution();
00282                 if (oneSolution.isEmpty()) 
00283                         
00284                         return new HashMap<String, Vector<Object>>(); 
00285                 
00286                 
00287 
00288                 
00289                 q.rewind();
00290                 solutions = q.allSolutions();
00291 
00292                 for (Object key : solutions[0].keySet()) {
00293                         result.put(key.toString(), new Vector<Object>());
00294                 }
00295 
00296                 
00297                 for (int i = 0; i < solutions.length; i++) {
00298                         Hashtable solution = solutions[i];
00299                         for (Object key : solution.keySet()) {
00300                                 String keyStr = key.toString();
00301 
00302                                 if (!result.containsKey(keyStr)) {
00303 
00304                                         
00305                                         Vector<Object> resultVector = new Vector<Object>();
00306                                         resultVector.add(i, solution.get(key).toString());
00307                                         result.put(keyStr, resultVector);
00308 
00309                                 }
00310                                 
00311                                 Vector<Object> resultVector = result.get(keyStr);
00312                                 resultVector.add(i, solution.get(key).toString());
00313                         }
00314                 }
00315                 
00316                 return result;
00317         }
00318 
00329         public static ArrayList<String> getSynsets(String word, String pos) {
00330 
00331                 Map<String, Vector<Object>> results;
00332 
00333 
00334                         results = executeQuery("comp_ehow:word_has_synset_uri('" + word.toLowerCase()
00335                                         + "', Synsets)", null);
00336 
00337 
00338 
00339 
00340                 Vector<Object> synsets = results.get("Synsets");
00341 
00342                 if (synsets == null)
00343                         return new ArrayList<String>();
00344 
00345                 ArrayList<String> synsetArray = new ArrayList<String>();
00346 
00347                 for (int i = 0; i < synsets.size(); i++)
00348                         synsetArray.add(((String) synsets.get(i)).replaceAll("'", ""));
00349 
00350                 return synsetArray;
00351         }
00352 
00353         public static String getSynsetFromID(String synsetID) {
00354                 Map<String, Vector<Object>> results;
00355 
00356                 results = executeQuery("comp_ehow:synset_from_id(Synset,'" + synsetID + "')",
00357                                 null);
00358 
00359                 Vector<Object> synsets = results.get("Synset");
00360 
00361                 if (synsets == null || synsets.size() == 0)
00362                         return null;
00363                 else
00364                         return ((String)synsets.get(0)).replaceAll("'", "");
00365         }
00366 
00367         public static String readLine() {
00368                 try {
00369                         StringBuilder str = new StringBuilder();
00370                         int read;
00371                         read = System.in.read();
00372 
00373                         while ((char) read != '\n') {
00374                                 if (((char) read) != '\r' && (char) read != '\n')
00375                                         str.append((char) read);
00376 
00377                                 read = System.in.read();
00378                         }
00379                         return str.toString();
00380                 } catch (IOException e) {
00381                         e.printStackTrace();
00382                 }
00383                 return null;
00384         }
00385 
00386         public static void main(String[] args) {
00387                 
00388                 
00389                 
00390                 
00391                 
00392                 
00393                 
00394                 
00395                 
00396                 
00397                 
00398                 
00399                 
00400                 
00401                 
00402                 
00403                 
00404                 
00405                 
00406                 
00407                 
00408         
00409 
00410                 do {
00411                         String read = readLine();
00412                         if (read.equals("exit"))
00413                                 break;
00414                         if (wordExistsAs(read, RDF_TYPE_VERB_SYNSET)) {
00415                                 System.out.println(getSynsets(read, RDF_TYPE_VERB_SYNSET));
00416                         }
00417                 } while (true);
00418 
00419                 
00420                 
00421                 
00422                 
00423                 
00424                 
00425                 
00426                 
00427                 
00428                 
00429                 
00430                 
00431                 
00432                 
00433         }
00434 }