$search
00001 package instruction.disambiguator; 00002 00003 import java.io.File; 00004 import java.io.FileReader; 00005 import java.io.FileWriter; 00006 import java.io.IOException; 00007 import java.util.ArrayList; 00008 import java.util.Collections; 00009 import java.util.Comparator; 00010 import java.util.HashMap; 00011 import java.util.Iterator; 00012 import java.util.List; 00013 import java.util.Set; 00014 import org.jdom.Document; 00015 import org.jdom.Element; 00016 import org.jdom.JDOMException; 00017 import org.jdom.input.SAXBuilder; 00018 import org.jdom.output.XMLOutputter; 00019 00020 public class DisambiguatorKB { 00021 00022 // ================================================================== 00023 // XML tags and attributes 00024 // ================================================================== 00025 public static final String TAG_DISAMBIGUATOR = "disambiguator"; 00026 public static final String TAG_PREPOSITIONS = "prepositions"; 00027 public static final String TAG_INSTRUCTIONS = "instructions"; 00028 public static final String TAG_PREPOSITION = "preposition"; 00029 public static final String TAG_ACTION = "action"; 00030 public static final String TAG_OBJECTS = "objects"; 00031 public static final String TAG_OBJECT = "object"; 00032 public static final String TAG_DEPENDENCY = "dependency"; 00033 public static final String ATTR_COUNT = "count"; 00034 public static final String ATTR_NAME = "name"; 00035 public static final String ATTR_EVIDENCE = "evidence"; 00036 00037 private int instructionCount = 0; 00038 private int prepositionCount = 0; 00039 private int objectCount = 0; 00040 private String fileName = null; 00041 00042 private HashMap<String, Preposition> prepositions = new HashMap<String, Preposition>(); 00043 private HashMap<String, Action> actions = new HashMap<String, Action>(); 00044 private HashMap<String, InstObject> objects = new HashMap<String, InstObject>(); 00045 00046 public DisambiguatorKB () { 00047 00048 } 00049 00055 public void tellPreposition( String p ) { 00056 00057 Preposition pp = prepositions.get( p ); 00058 prepositionCount++; 00059 00060 if ( pp == null ) { 00061 pp = new Preposition( p, 0 ); 00062 prepositions.put( p, pp ); 00063 } 00064 00065 pp.count++; 00066 } 00067 00075 public void tellObject( String object, String evidence ) { 00076 00077 InstObject o = objects.get( object ); 00078 00079 ArrayList<String> ev = new ArrayList<String>(); 00080 ev.add( evidence ); 00081 Condition c = new Condition( ev, 1 ); 00082 00083 objectCount++; 00084 00085 if ( o != null ) { 00086 for ( int i = 0; i < o.conditions.size(); i++ ) { 00087 if ( o.conditions.get( i ).equals( c ) ) { 00088 o.conditions.get( i ).count++; 00089 return; 00090 } 00091 } 00092 o.conditions.add( c ); 00093 } 00094 else { 00095 o = new InstObject( object ); 00096 o.conditions.add( c ); 00097 objects.put( object, o ); 00098 } 00099 00100 } 00101 00109 public void tellAction( String action, List<String> evidence ) { 00110 00111 Action a = actions.get( action ); 00112 00113 Condition c = new Condition( evidence, 1 ); 00114 00115 instructionCount++; 00116 00117 if ( a != null ) { 00118 a.count++; 00119 for ( int i = 0; i < a.conditions.size(); i++ ) { 00120 if ( a.conditions.get( i ).equals( c ) ) { 00121 a.conditions.get( i ).count++; 00122 return; 00123 } 00124 } 00125 a.conditions.add( c ); 00126 } 00127 else { 00128 a = new Action( action, 1 ); 00129 a.conditions.add( c ); 00130 actions.put( action, a ); 00131 } 00132 } 00133 00140 public List<String> askActions( List<String> evidence ) { 00141 00142 ArrayList<String> sortedKeys = new ArrayList<String>( actions.keySet() ); 00143 00144 ProbabilityComparator comparator = new ProbabilityComparator( evidence, Action.class ); 00145 00146 Collections.sort( sortedKeys, comparator ); 00147 00148 ArrayList<String> sortedActions = new ArrayList<String>(); 00149 for ( int i = 0; i < sortedKeys.size(); i++ ) 00150 sortedActions.add( actions.get( sortedKeys.get( i ) ).name ); 00151 00152 return sortedActions; 00153 } 00154 00161 public List<String> askObjects( String evidence ) { 00162 00163 ArrayList<String> sortedKeys = new ArrayList<String>( objects.keySet() ); 00164 00165 ArrayList<String> e = new ArrayList<String>(); 00166 e.add( evidence ); 00167 ProbabilityComparator comparator = new ProbabilityComparator( e, InstObject.class ); 00168 00169 Collections.sort( sortedKeys, comparator ); 00170 ArrayList<String> sortedObjects = new ArrayList<String>(); 00171 for ( int i = 0; i < sortedKeys.size(); i++ ) 00172 sortedObjects.add( objects.get( sortedKeys.get( i ) ).name ); 00173 00174 return sortedObjects; 00175 } 00176 00182 private List<Preposition> getPP() { 00183 00184 ArrayList<Preposition> pp = new ArrayList<Preposition>(); 00185 Set<String> keySet = prepositions.keySet(); 00186 for ( Iterator<String> i = keySet.iterator(); i.hasNext(); ) 00187 pp.add( prepositions.get( i.next() ) ); 00188 00189 return pp; 00190 } 00191 00200 private double getProbabilityOfAction( String action, List<String> evidence ) { 00201 00202 Action a = actions.get( action ); 00203 Condition c = new Condition( evidence, - 1 ); 00204 if ( a == null ) 00205 return 0; 00206 00207 int idx = a.conditions.indexOf( c ); 00208 if ( idx < 0 ) 00209 return 0; 00210 else { 00211 Condition cond = a.conditions.get( idx ); 00212 00213 // P(Action ^ Condition) 00214 double p_aAndp = ( (double) cond.count ) / ( (double) instructionCount ); 00215 00216 // P(on) * P(from) * ... 00217 double pp = 1.0; 00218 00219 for ( int i = 0; i < getPP().size(); i++ ) { 00220 double p = ( (double) getPP().get( i ).count ) / ( (double) instructionCount ); 00221 00222 if ( cond.evidence.contains( getPP().get( i ).name ) ) 00223 pp *= p; 00224 else 00225 continue;//pp *= ( 1.0 - p ); 00226 } 00227 00228 return p_aAndp / pp; 00229 } 00230 } 00231 00240 private double getProbabilityOfObject( String object, List<String> evidence ) { 00241 00242 InstObject o = objects.get( object ); 00243 Condition c = new Condition( evidence, - 1 ); 00244 00245 if ( o == null ) 00246 return 0.; 00247 00248 int idx = o.conditions.indexOf( c ); 00249 if ( idx < 0 ) 00250 return 0; 00251 else { 00252 Condition cond = o.conditions.get( idx ); 00253 Action a = actions.get( evidence.get( 0 ) ); 00254 double p_a = (double) a.count / (double) instructionCount; 00255 double p_oAnda = ( (double) cond.count / (double) instructionCount ); 00256 00257 return p_oAnda / p_a; 00258 } 00259 } 00260 00268 @SuppressWarnings("unchecked") 00269 public void load( String fileName ) throws JDOMException, IOException { 00270 00271 // clear all 00272 prepositions.clear(); 00273 actions.clear(); 00274 objects.clear(); 00275 00276 this.fileName = fileName; 00277 00278 File file = new File( fileName ); 00279 00280 if ( ! file.exists() ) 00281 return; 00282 00283 FileReader reader = new FileReader( file ); 00284 00285 SAXBuilder sax = new SAXBuilder(); 00286 Document doc = sax.build( reader ); 00287 00288 Element disamb = doc.getRootElement(); 00289 00290 List<Element> elements = disamb.getChildren(); 00291 00292 for ( Iterator<Element> i = elements.iterator(); i.hasNext(); ) { 00293 Element el = i.next(); 00294 00295 // ================================================================== 00296 // Preposition-Count 00297 // ================================================================== 00298 if ( el.getName().equalsIgnoreCase( TAG_PREPOSITIONS ) ) 00299 prepositionCount = Integer.valueOf( el.getAttributeValue( ATTR_COUNT ) ); 00300 00301 // ================================================================== 00302 // Instructions-Count 00303 // ================================================================== 00304 else if ( el.getName().equalsIgnoreCase( TAG_INSTRUCTIONS ) ) 00305 instructionCount = Integer.valueOf( el.getAttributeValue( ATTR_COUNT ) ); 00306 00307 // ================================================================== 00308 // Read all elements 00309 // ================================================================== 00310 else if ( el.getName().equalsIgnoreCase( TAG_OBJECTS ) ) 00311 objectCount = Integer.valueOf( el.getAttributeValue( ATTR_COUNT ) ); 00312 00313 // ================================================================== 00314 // Preposition 00315 // ================================================================== 00316 else if ( el.getName().equalsIgnoreCase( TAG_PREPOSITION ) ) { 00317 String name = el.getAttributeValue( ATTR_NAME ); 00318 int count = Integer.valueOf( el.getAttributeValue( ATTR_COUNT ) ); 00319 prepositions.put( name, new Preposition( name, count ) ); 00320 } 00321 00322 // ================================================================== 00323 // Action 00324 // ================================================================== 00325 else if ( el.getName().equalsIgnoreCase( TAG_ACTION ) ) { 00326 String name = el.getAttributeValue( ATTR_NAME ); 00327 int count = Integer.valueOf( el.getAttributeValue( ATTR_COUNT ) ); 00328 00329 Action a = new Action( name, count ); 00330 00331 // ================================================================== 00332 // Dependencies 00333 // ================================================================== 00334 List<Element> cond = el.getChildren(); 00335 for ( Iterator<Element> j = cond.iterator(); j.hasNext(); ) { 00336 Element e = j.next(); 00337 String evidences = e.getAttributeValue( ATTR_EVIDENCE ); 00338 int countEvid = Integer.valueOf( e.getAttributeValue( ATTR_COUNT ) ); 00339 00340 String[] evid = evidences.split( ";" ); 00341 List<String> ev = new ArrayList<String>(); 00342 for ( int k = 0; k < evid.length; k++ ) { 00343 if ( ! evid[k].isEmpty() ) 00344 ev.add( evid[k] ); 00345 } 00346 00347 a.conditions.add( new Condition( ev, countEvid ) ); 00348 } 00349 actions.put( name, a ); 00350 } 00351 00352 // ================================================================== 00353 // Object 00354 // ================================================================== 00355 else if ( el.getName().equalsIgnoreCase( TAG_OBJECT ) ) { 00356 00357 String name = el.getAttributeValue( ATTR_NAME ); 00358 00359 InstObject o = new InstObject( name ); 00360 00361 // ================================================================== 00362 // Dependencies 00363 // ================================================================== 00364 List<Element> cond = el.getChildren(); 00365 for ( Iterator<Element> j = cond.iterator(); j.hasNext(); ) { 00366 Element e = j.next(); 00367 String evidences = e.getAttributeValue( ATTR_EVIDENCE ); 00368 int countEvid = Integer.valueOf( e.getAttributeValue( ATTR_COUNT ) ); 00369 00370 String[] evid = evidences.split( ";" ); 00371 List<String> ev = new ArrayList<String>(); 00372 for ( int k = 0; k < evid.length; k++ ) 00373 ev.add( evid[k] ); 00374 00375 o.conditions.add( new Condition( ev, countEvid ) ); 00376 } 00377 objects.put( name, o ); 00378 } 00379 } 00380 } 00381 00387 public void save() throws IOException { 00388 00389 if ( fileName != null ) 00390 saveAs( fileName ); 00391 } 00392 00399 public void saveAs( String fileName ) throws IOException { 00400 00401 this.fileName = fileName; 00402 File file = new File( fileName ); 00403 if ( ! file.exists() ) 00404 file.createNewFile(); 00405 00406 Element disamb = new Element( TAG_DISAMBIGUATOR ); 00407 00408 // ================================================================== 00409 // Instruction-Count 00410 // ================================================================== 00411 Element instructions = new Element( TAG_INSTRUCTIONS ); 00412 instructions.setAttribute( ATTR_COUNT, String.valueOf( instructionCount ) ); 00413 disamb.addContent( instructions ); 00414 00415 // ================================================================== 00416 // Objects-Count 00417 // ================================================================== 00418 Element obj = new Element( TAG_OBJECTS ); 00419 obj.setAttribute( ATTR_COUNT, String.valueOf( objectCount ) ); 00420 disamb.addContent( obj ); 00421 00422 // ================================================================== 00423 // Preposition-Count 00424 // ================================================================== 00425 Element preposition_count = new Element( TAG_PREPOSITIONS ); 00426 preposition_count.setAttribute( ATTR_COUNT, String.valueOf( prepositionCount ) ); 00427 disamb.addContent( preposition_count ); 00428 00429 // ================================================================== 00430 // Prepositions 00431 // ================================================================== 00432 Set<String> keys = prepositions.keySet(); 00433 for ( Iterator<String> i = keys.iterator(); i.hasNext(); ) { 00434 Preposition pp = prepositions.get( i.next() ); 00435 00436 Element p = new Element( TAG_PREPOSITION ); 00437 p.setAttribute( ATTR_NAME, pp.name ); 00438 p.setAttribute( ATTR_COUNT, String.valueOf( pp.count ) ); 00439 disamb.addContent( p ); 00440 } 00441 00442 // ================================================================== 00443 // Actions 00444 // ================================================================== 00445 keys = actions.keySet(); 00446 for ( Iterator<String> i = keys.iterator(); i.hasNext(); ) { 00447 Action a = actions.get( i.next() ); 00448 Element e = new Element( TAG_ACTION ); 00449 e.setAttribute( ATTR_NAME, a.name ); 00450 e.setAttribute( ATTR_COUNT, String.valueOf( a.count ) ); 00451 00452 for ( int j = 0; j < a.conditions.size(); j++ ) { 00453 Element dependency = new Element( TAG_DEPENDENCY ); 00454 dependency.setAttribute( ATTR_COUNT, String.valueOf( a.conditions.get( j ).count ) ); 00455 dependency.setAttribute( ATTR_EVIDENCE, toArrayString( a.conditions.get( j ).evidence ) ); 00456 e.addContent( dependency ); 00457 } 00458 disamb.addContent( e ); 00459 } 00460 00461 // ================================================================== 00462 // Objects 00463 // ================================================================== 00464 keys = objects.keySet(); 00465 for ( Iterator<String> i = keys.iterator(); i.hasNext(); ) { 00466 InstObject o = objects.get( i.next() ); 00467 Element e = new Element( TAG_OBJECT ); 00468 e.setAttribute( ATTR_NAME, o.name ); 00469 00470 for ( int j = 0; j < o.conditions.size(); j++ ) { 00471 Element dependency = new Element( TAG_DEPENDENCY ); 00472 dependency.setAttribute( ATTR_COUNT, String.valueOf( o.conditions.get( j ).count ) ); 00473 dependency.setAttribute( ATTR_EVIDENCE, toArrayString( o.conditions.get( j ).evidence ) ); 00474 e.addContent( dependency ); 00475 } 00476 disamb.addContent( e ); 00477 } 00478 00479 // ================================================================== 00480 // Create XML document and write it to file 00481 // ================================================================== 00482 Document doc = new Document( disamb ); 00483 00484 XMLOutputter outPutter = new XMLOutputter( " ", true ); 00485 FileWriter outStream = new FileWriter( file ); 00486 00487 outPutter.output( doc, outStream ); 00488 } 00489 00490 public String toArrayString( List<String> list ) { 00491 00492 String str = ""; 00493 for ( int i = 0; i < list.size(); i++ ) { 00494 str += list.get( i ); 00495 if ( i < list.size() - 1 ) 00496 str += ";"; 00497 } 00498 00499 return str; 00500 } 00501 00502 // ================================================================== 00503 // Inner classes for Knowledge representation 00504 // ================================================================== 00505 00509 private class Preposition { 00510 public String name; 00511 public int count; 00512 00513 public Preposition ( String name, int count ) { 00514 00515 this.name = name; 00516 this.count = count; 00517 } 00518 } 00519 00526 private class Condition { 00527 public List<String> evidence; 00528 public int count; 00529 00530 public Condition ( List<String> evidence, int count ) { 00531 00532 this.evidence = evidence; 00533 this.count = count; 00534 } 00535 00536 public boolean equals( Object o ) { 00537 00538 if ( ! ( o instanceof Condition ) ) 00539 return false; 00540 00541 Condition c = (Condition) o; 00542 if ( c.evidence.isEmpty() && evidence.isEmpty() ) 00543 return true; 00544 else if ( c.evidence.containsAll( evidence ) && evidence.containsAll( c.evidence ) ) 00545 return true; 00546 else 00547 return false; 00548 } 00549 } 00550 00558 private class Action { 00559 public String name; 00560 public int count; 00561 public List<Condition> conditions = new ArrayList<Condition>(); 00562 00563 public Action ( String name, int count ) { 00564 00565 this.name = name; 00566 this.count = count; 00567 } 00568 } 00569 00576 private class InstObject { 00577 public String name; 00578 public ArrayList<Condition> conditions = new ArrayList<Condition>(); 00579 00580 public InstObject ( String name ) { 00581 00582 this.name = name; 00583 } 00584 } 00585 00592 private class ProbabilityComparator implements Comparator<String> { 00593 00594 List<String> evidence; 00595 00596 Class type; 00597 00598 public ProbabilityComparator ( List<String> evidence, Class c ) { 00599 00600 this.evidence = evidence; 00601 this.type = c; 00602 } 00603 00604 public int compare( String key1, String key2 ) { 00605 00606 double p1, p2; 00607 00608 if ( type == Action.class ) { 00609 Action a1 = actions.get( key1 ); 00610 Action a2 = actions.get( key2 ); 00611 00612 p1 = getProbabilityOfAction( a1.name, evidence ); 00613 p2 = getProbabilityOfAction( a2.name, evidence ); 00614 } 00615 else if ( type == InstObject.class ) { 00616 InstObject o1 = objects.get( key1 ); 00617 InstObject o2 = objects.get( key2 ); 00618 00619 p1 = getProbabilityOfObject( o1.name, evidence ); 00620 p2 = getProbabilityOfObject( o2.name, evidence ); 00621 } 00622 else 00623 return 0; 00624 00625 if ( p1 < p2 ) 00626 return 1; 00627 00628 else if ( p1 > p2 ) 00629 return - 1; 00630 00631 return 0; 00632 } 00633 00634 } 00635 }