WordNetRDF.java
Go to the documentation of this file.
00001 package instruction.wordnet;
00002 
00003 import instruction.exceptions.WordNetException;
00004 import instruction.semanticObjects.Word;
00005 
00006 import java.io.File;
00007 import java.io.FileNotFoundException;
00008 import java.io.FileReader;
00009 import java.io.IOException;
00010 import java.net.MalformedURLException;
00011 import java.util.ArrayList;
00012 import java.util.HashMap;
00013 import java.util.Hashtable;
00014 import java.util.Map;
00015 import java.util.Vector;
00016 import jpl.Query;
00017 
00027 public class WordNetRDF {
00028 
00030         public static final String RDF_TYPE_NOUN_SYNSET = "http://www.w3.org/2006/03/wn/wn20/schema/NounSynset";
00031 
00033         public static final String RDF_TYPE_VERB_SYNSET = "http://www.w3.org/2006/03/wn/wn20/schema/VerbSynset";
00034 
00036         public static final String RDF_TYPE_ADJECTIVE_SYNSET = "http://www.w3.org/2006/03/wn/wn20/schema/AdjectiveSatelliteSynset";
00037 
00039         public static final String RDF_TYPE_ADVERB_SYNSET = "http://www.w3.org/2006/03/wn/wn20/schema/AdverbSynset";
00040 
00041         public static final boolean CACHE_IRREGULAR_FORMS = true;
00042 
00043         protected static HashMap<String, String> irregularVerbs = null;
00044 
00045         protected static HashMap<String, String> irregularNouns = null;
00046 
00050 //      static {
00051 //              try {
00052 //                      Vector<String> args = new Vector<String>(Arrays.asList(Prolog
00053 //                                      .get_default_init_args()));
00054 //                      args.add("-G128M");
00055 //                      // args.add("-q");
00056 //                      args.add("-nosignals");
00057 //                      Prolog.set_default_init_args(args.toArray(new String[0]));
00058 //
00059 //                      // load the startup files and modules for WordNet
00060 //                      new Query("ensure_loaded('"
00061 //                                      + absoluteFilePath("etc/wordnet/wordnet.pl") + "')")
00062 //                                      .oneSolution();
00063 //                      new Query("rdf_db:rdf_load('"
00064 //                                      + absoluteFilePath("etc/wordnet/wordnet-synset.rdf" + "')"))
00065 //                                      .oneSolution();
00066 //
00067 //                      cacheIrregularNouns();
00068 //                      cacheIrregularVerbs();
00069 //
00070 //              } catch (Exception e) {
00071 //                      e.printStackTrace();
00072 //              }
00073 //      }
00074 
00079         public static String getWordStem(String word, String pos) {
00080 
00081                 word = word.toLowerCase();
00082 
00083                 String stem = null;
00084 
00085                 boolean found = false;
00086 
00087                 if (pos.equals(RDF_TYPE_VERB_SYNSET)) {
00088                         stem = word;
00089                         found = wordExactlyExistsAs(stem, pos);
00090 
00091                         if (!found && word.endsWith("ed")) {
00092                                 stem = word.substring(0, word.length() - 1);
00093                                 found = wordExactlyExistsAs(stem, pos);
00094                                 if (!found) {
00095                                         stem = word.substring(0, word.length() - 2);
00096                                         found = wordExactlyExistsAs(stem, pos);
00097                                 }
00098                         }
00099 
00100                         if (!found) {
00101                                 if (irregularVerbs.get(word) != null) {
00102                                         stem = irregularVerbs.get(word);
00103                                         found = wordExactlyExistsAs(stem, pos);
00104                                 }
00105                         }
00106                 }
00107 
00108                 else if (pos.equals(RDF_TYPE_ADVERB_SYNSET)) {
00109                         stem = word;
00110                         found = wordExactlyExistsAs(stem, pos);
00111                 }
00112 
00113                 else if (pos.equals(RDF_TYPE_ADJECTIVE_SYNSET)) {
00114                         stem = word;
00115                         found = wordExactlyExistsAs(stem, pos);
00116                 }
00117 
00118                 else if (pos.equals(RDF_TYPE_NOUN_SYNSET)) {
00119                         stem = word;
00120                         found = wordExactlyExistsAs(stem, pos);
00121                         if (!found) {
00122                                 if (word.endsWith("s")) {
00123                                         stem = word.substring(0, word.length() - 1);
00124                                         found = wordExactlyExistsAs(stem, pos);
00125                                 }
00126                                 if (!found && word.endsWith("es")) {
00127                                         stem = word.substring(0, word.length() - 2);
00128                                         found = wordExactlyExistsAs(stem, pos);
00129                                 }
00130                                 if (!found) {
00131                                         if (irregularNouns.get(word) != null) {
00132                                                 stem = irregularNouns.get(word);
00133                                                 found = wordExactlyExistsAs(stem, pos);
00134                                         }
00135                                 }
00136                         }
00137                 }
00138 
00139                 if (found)
00140                         return stem;
00141                 else
00142                         return null;
00143         }
00144 
00157         public static boolean wordExistsAs(String word, String pos) {
00158 
00159                 boolean found = wordExactlyExistsAs(word, pos);
00160 
00161                 if (!found) {
00162 
00163                         String stem = getWordStem(word, pos);
00164 
00165                         found = wordExactlyExistsAs(stem, pos);
00166                 }
00167 
00168                 return found;
00169         }
00170 
00171         public static String convertPOS(int type) {
00172 
00173                 String pos = null;
00174 
00175                 if (type == Word.TYPE_VERB_INFINITIVE
00176                                 || type == Word.TYPE_PAST_PARTICIPLE
00177                                 || type == Word.TYPE_GERUND)
00178                         pos = RDF_TYPE_VERB_SYNSET;
00179                 else if (type == Word.TYPE_NOUN)
00180                         pos = RDF_TYPE_NOUN_SYNSET;
00181                 else if (type == Word.TYPE_ADV)
00182                         pos = RDF_TYPE_ADVERB_SYNSET;
00183                 else if (type == Word.TYPE_ADJ)
00184                         pos = RDF_TYPE_ADJECTIVE_SYNSET;
00185 
00186                 return pos;
00187         }
00188 
00200         public static boolean wordExactlyExistsAs(String word, String pos) {
00201 
00202                 if (word == null)
00203                         return false;
00204 
00205                 return getSynsets(word, pos).size() > 0;
00206         }
00207 
00208         @SuppressWarnings("unused")
00209         private static void cacheIrregularNouns() throws IOException,
00210                         WordNetException {
00211                 File noun_exc = new File(absoluteFilePath("etc/wordnet/noun.exc"));
00212                 if (!noun_exc.exists())
00213                         throw new FileNotFoundException(
00214                                         "Mapping File for irregular Noun forms ("
00215                                                         + noun_exc.getAbsolutePath() + ") not found");
00216                 else {
00217                         FileReader reader = new FileReader(noun_exc);
00218                         StringBuilder file = new StringBuilder();
00219                         char[] buffer = new char[1024];
00220                         int read = 0;
00221                         while ((read = reader.read(buffer)) >= 0)
00222                                 file.append(buffer, 0, read);
00223                         irregularNouns = new HashMap<String, String>();
00224 
00225                         String[] mappings = file.toString().split(
00226                                         System.getProperty("line.separator"));
00227                         for (int i = 0; i < mappings.length; i++) {
00228                                 String[] keyValueSet = mappings[i].split(" ");
00229                                 if (keyValueSet.length < 2)
00230                                         throw new WordNetException("Illegal Mapping entry in "
00231                                                         + noun_exc.getName());
00232                                 else {
00233                                         irregularNouns.put(keyValueSet[0], keyValueSet[1]);
00234                                 }
00235                         }
00236 
00237                 }
00238         }
00239 
00240         // public static void initializeMissingMappings() {
00241         //
00242         // missingNouns = new HashMap<String, String>();
00243         // missingNouns.put("stove burner", "04163507");
00244         // missingNouns.put("stove top", "TUM00001");
00245         // missingNouns.put("eating bowl", "02778691");
00246         // missingNouns.put("packet soup", "07115585");
00247         // missingNouns.put("contents", "07474342");
00248         // missingNouns.put("iced tea", "N07456631");
00249         //
00250         // missingVerbs = new HashMap<String, String>();
00251         // missingVerbs.put("look at", "TUM00002");
00252         // missingVerbs.put("bring out", "01145176");
00253         //
00254         // }
00255 
00256         public static String absoluteFilePath(String relativePath) {
00257                 return new File(relativePath).getAbsolutePath().replaceAll("\\\\", "/");
00258         }
00259 
00268         private static Map<String, Vector<Object>> executeQuery(String query,
00269                         String plFile) {
00270 
00271                 // System.err.println("Executing query: " + query);
00272 
00273                 HashMap<String, Vector<Object>> result = new HashMap<String, Vector<Object>>();
00274                 Hashtable[] solutions;
00275 
00276                 Query q = new Query("expand_goal((" + query + "),_9), call(_9)");
00277 
00278                 // Due to bugs we have to check for one answer beforehand.
00279                 if (!q.hasMoreSolutions())
00280                         return new HashMap<String, Vector<Object>>();
00281                 Hashtable oneSolution = q.nextSolution();
00282                 if (oneSolution.isEmpty()) // Due to a bug consulting a file without
00283                         // anything else results in shutdown
00284                         return new HashMap<String, Vector<Object>>(); // I will try to
00285                 // prevent it with
00286                 // this construction
00287 
00288                 // Restart the query and fetch everything.
00289                 q.rewind();
00290                 solutions = q.allSolutions();
00291 
00292                 for (Object key : solutions[0].keySet()) {
00293                         result.put(key.toString(), new Vector<Object>());
00294                 }
00295 
00296                 // Build the result
00297                 for (int i = 0; i < solutions.length; i++) {
00298                         Hashtable solution = solutions[i];
00299                         for (Object key : solution.keySet()) {
00300                                 String keyStr = key.toString();
00301 
00302                                 if (!result.containsKey(keyStr)) {
00303 
00304                                         // previously unknown column, add result vector
00305                                         Vector<Object> resultVector = new Vector<Object>();
00306                                         resultVector.add(i, solution.get(key).toString());
00307                                         result.put(keyStr, resultVector);
00308 
00309                                 }
00310                                 // Put the solution into the correct vector
00311                                 Vector<Object> resultVector = result.get(keyStr);
00312                                 resultVector.add(i, solution.get(key).toString());
00313                         }
00314                 }
00315                 // Generate the final QueryResult and return
00316                 return result;
00317         }
00318 
00329         public static ArrayList<String> getSynsets(String word, String pos) {
00330 
00331                 Map<String, Vector<Object>> results;
00332 
00333 //              if (pos == null)
00334                         results = executeQuery("comp_ehow:word_has_synset_uri('" + word.toLowerCase()
00335                                         + "', Synsets)", null);
00336 //              else
00337 //                      results = executeQuery("word_has_synset_of_type('"
00338 //                                      + word.toLowerCase() + "', Synsets, '" + pos + "')", null);
00339 
00340                 Vector<Object> synsets = results.get("Synsets");
00341 
00342                 if (synsets == null)
00343                         return new ArrayList<String>();
00344 
00345                 ArrayList<String> synsetArray = new ArrayList<String>();
00346 
00347                 for (int i = 0; i < synsets.size(); i++)
00348                         synsetArray.add(((String) synsets.get(i)).replaceAll("'", ""));
00349 
00350                 return synsetArray;
00351         }
00352 
00353         public static String getSynsetFromID(String synsetID) {
00354                 Map<String, Vector<Object>> results;
00355 
00356                 results = executeQuery("comp_ehow:synset_from_id(Synset,'" + synsetID + "')",
00357                                 null);
00358 
00359                 Vector<Object> synsets = results.get("Synset");
00360 
00361                 if (synsets == null || synsets.size() == 0)
00362                         return null;
00363                 else
00364                         return ((String)synsets.get(0)).replaceAll("'", "");
00365         }
00366 
00367         public static String readLine() {
00368                 try {
00369                         StringBuilder str = new StringBuilder();
00370                         int read;
00371                         read = System.in.read();
00372 
00373                         while ((char) read != '\n') {
00374                                 if (((char) read) != '\r' && (char) read != '\n')
00375                                         str.append((char) read);
00376 
00377                                 read = System.in.read();
00378                         }
00379                         return str.toString();
00380                 } catch (IOException e) {
00381                         e.printStackTrace();
00382                 }
00383                 return null;
00384         }
00385 
00386         public static void main(String[] args) {
00387                 //
00388                 // WordNetRDF wn = new WordNetRDF();
00389                 // Map<String, Vector<Object>> synsetIds = wn.executeQuery(
00390                 // "word_has_synset_id('cup', SynsetId)", null);
00391                 // for (String key : synsetIds.keySet()) {
00392                 // System.out.println(key + " -> " + synsetIds.get(key).toString());
00393                 // }
00394                 //
00395                 // String[] synsets = wn.getSynsets("cup", null);
00396                 // for (String synset : synsets)
00397                 // System.out.println(synset);
00398                 //
00399                 // Map<String, Vector<Object>> types = wn
00400                 // .executeQuery(
00401                 // "synset_type('http://www.w3.org/2006/03/wn/wn20/instances/synset-cup-noun-3', Type)",
00402                 // null);
00403                 // for (String key : types.keySet()) {
00404                 // System.out.println(key + " -> " + types.get(key).toString());
00405                 //
00406                 // }
00407                 
00408         //      System.out.println(getSynsetFromID("302351490"));
00409 
00410                 do {
00411                         String read = readLine();
00412                         if (read.equals("exit"))
00413                                 break;
00414                         if (wordExistsAs(read, RDF_TYPE_VERB_SYNSET)) {
00415                                 System.out.println(getSynsets(read, RDF_TYPE_VERB_SYNSET));
00416                         }
00417                 } while (true);
00418 
00419                 // try {
00420                 // if ( wordExistsAs( "chopped", Word.TYPE_PAST_PARTICIPLE ) )
00421                 // System.out.println( "Word found" );
00422                 // else
00423                 // System.out.println( "Word not found" );
00424                 // }
00425                 // catch ( MalformedURLException e ) {
00426                 // // TODO Auto-generated catch block
00427                 // e.printStackTrace();
00428                 // }
00429                 // catch ( Exception e ) {
00430                 // // TODO Auto-generated catch block
00431                 // e.printStackTrace();
00432                 // }
00433         }
00434 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Properties Friends


comp_ehow
Author(s): Moritz Tenorth, Daniel Nyga
autogenerated on Tue Apr 16 2013 00:18:03