00001 package instruction.disambiguator;
00002
00003 import java.io.File;
00004 import java.io.FileReader;
00005 import java.io.FileWriter;
00006 import java.io.IOException;
00007 import java.util.ArrayList;
00008 import java.util.Collections;
00009 import java.util.Comparator;
00010 import java.util.HashMap;
00011 import java.util.Iterator;
00012 import java.util.List;
00013 import java.util.Set;
00014 import org.jdom.Document;
00015 import org.jdom.Element;
00016 import org.jdom.JDOMException;
00017 import org.jdom.input.SAXBuilder;
00018 import org.jdom.output.XMLOutputter;
00019
00020 public class DisambiguatorKB {
00021
00022
00023
00024
00025 public static final String TAG_DISAMBIGUATOR = "disambiguator";
00026 public static final String TAG_PREPOSITIONS = "prepositions";
00027 public static final String TAG_INSTRUCTIONS = "instructions";
00028 public static final String TAG_PREPOSITION = "preposition";
00029 public static final String TAG_ACTION = "action";
00030 public static final String TAG_OBJECTS = "objects";
00031 public static final String TAG_OBJECT = "object";
00032 public static final String TAG_DEPENDENCY = "dependency";
00033 public static final String ATTR_COUNT = "count";
00034 public static final String ATTR_NAME = "name";
00035 public static final String ATTR_EVIDENCE = "evidence";
00036
00037 private int instructionCount = 0;
00038 private int prepositionCount = 0;
00039 private int objectCount = 0;
00040 private String fileName = null;
00041
00042 private HashMap<String, Preposition> prepositions = new HashMap<String, Preposition>();
00043 private HashMap<String, Action> actions = new HashMap<String, Action>();
00044 private HashMap<String, InstObject> objects = new HashMap<String, InstObject>();
00045
00046 public DisambiguatorKB () {
00047
00048 }
00049
00055 public void tellPreposition( String p ) {
00056
00057 Preposition pp = prepositions.get( p );
00058 prepositionCount++;
00059
00060 if ( pp == null ) {
00061 pp = new Preposition( p, 0 );
00062 prepositions.put( p, pp );
00063 }
00064
00065 pp.count++;
00066 }
00067
00075 public void tellObject( String object, String evidence ) {
00076
00077 InstObject o = objects.get( object );
00078
00079 ArrayList<String> ev = new ArrayList<String>();
00080 ev.add( evidence );
00081 Condition c = new Condition( ev, 1 );
00082
00083 objectCount++;
00084
00085 if ( o != null ) {
00086 for ( int i = 0; i < o.conditions.size(); i++ ) {
00087 if ( o.conditions.get( i ).equals( c ) ) {
00088 o.conditions.get( i ).count++;
00089 return;
00090 }
00091 }
00092 o.conditions.add( c );
00093 }
00094 else {
00095 o = new InstObject( object );
00096 o.conditions.add( c );
00097 objects.put( object, o );
00098 }
00099
00100 }
00101
00109 public void tellAction( String action, List<String> evidence ) {
00110
00111 Action a = actions.get( action );
00112
00113 Condition c = new Condition( evidence, 1 );
00114
00115 instructionCount++;
00116
00117 if ( a != null ) {
00118 a.count++;
00119 for ( int i = 0; i < a.conditions.size(); i++ ) {
00120 if ( a.conditions.get( i ).equals( c ) ) {
00121 a.conditions.get( i ).count++;
00122 return;
00123 }
00124 }
00125 a.conditions.add( c );
00126 }
00127 else {
00128 a = new Action( action, 1 );
00129 a.conditions.add( c );
00130 actions.put( action, a );
00131 }
00132 }
00133
00140 public List<String> askActions( List<String> evidence ) {
00141
00142 ArrayList<String> sortedKeys = new ArrayList<String>( actions.keySet() );
00143
00144 ProbabilityComparator comparator = new ProbabilityComparator( evidence, Action.class );
00145
00146 Collections.sort( sortedKeys, comparator );
00147
00148 ArrayList<String> sortedActions = new ArrayList<String>();
00149 for ( int i = 0; i < sortedKeys.size(); i++ )
00150 sortedActions.add( actions.get( sortedKeys.get( i ) ).name );
00151
00152 return sortedActions;
00153 }
00154
00161 public List<String> askObjects( String evidence ) {
00162
00163 ArrayList<String> sortedKeys = new ArrayList<String>( objects.keySet() );
00164
00165 ArrayList<String> e = new ArrayList<String>();
00166 e.add( evidence );
00167 ProbabilityComparator comparator = new ProbabilityComparator( e, InstObject.class );
00168
00169 Collections.sort( sortedKeys, comparator );
00170 ArrayList<String> sortedObjects = new ArrayList<String>();
00171 for ( int i = 0; i < sortedKeys.size(); i++ )
00172 sortedObjects.add( objects.get( sortedKeys.get( i ) ).name );
00173
00174 return sortedObjects;
00175 }
00176
00182 private List<Preposition> getPP() {
00183
00184 ArrayList<Preposition> pp = new ArrayList<Preposition>();
00185 Set<String> keySet = prepositions.keySet();
00186 for ( Iterator<String> i = keySet.iterator(); i.hasNext(); )
00187 pp.add( prepositions.get( i.next() ) );
00188
00189 return pp;
00190 }
00191
00200 private double getProbabilityOfAction( String action, List<String> evidence ) {
00201
00202 Action a = actions.get( action );
00203 Condition c = new Condition( evidence, - 1 );
00204 if ( a == null )
00205 return 0;
00206
00207 int idx = a.conditions.indexOf( c );
00208 if ( idx < 0 )
00209 return 0;
00210 else {
00211 Condition cond = a.conditions.get( idx );
00212
00213
00214 double p_aAndp = ( (double) cond.count ) / ( (double) instructionCount );
00215
00216
00217 double pp = 1.0;
00218
00219 for ( int i = 0; i < getPP().size(); i++ ) {
00220 double p = ( (double) getPP().get( i ).count ) / ( (double) instructionCount );
00221
00222 if ( cond.evidence.contains( getPP().get( i ).name ) )
00223 pp *= p;
00224 else
00225 continue;
00226 }
00227
00228 return p_aAndp / pp;
00229 }
00230 }
00231
00240 private double getProbabilityOfObject( String object, List<String> evidence ) {
00241
00242 InstObject o = objects.get( object );
00243 Condition c = new Condition( evidence, - 1 );
00244
00245 if ( o == null )
00246 return 0.;
00247
00248 int idx = o.conditions.indexOf( c );
00249 if ( idx < 0 )
00250 return 0;
00251 else {
00252 Condition cond = o.conditions.get( idx );
00253 Action a = actions.get( evidence.get( 0 ) );
00254 double p_a = (double) a.count / (double) instructionCount;
00255 double p_oAnda = ( (double) cond.count / (double) instructionCount );
00256
00257 return p_oAnda / p_a;
00258 }
00259 }
00260
00268 @SuppressWarnings("unchecked")
00269 public void load( String fileName ) throws JDOMException, IOException {
00270
00271
00272 prepositions.clear();
00273 actions.clear();
00274 objects.clear();
00275
00276 this.fileName = fileName;
00277
00278 File file = new File( fileName );
00279
00280 if ( ! file.exists() )
00281 return;
00282
00283 FileReader reader = new FileReader( file );
00284
00285 SAXBuilder sax = new SAXBuilder();
00286 Document doc = sax.build( reader );
00287
00288 Element disamb = doc.getRootElement();
00289
00290 List<Element> elements = disamb.getChildren();
00291
00292 for ( Iterator<Element> i = elements.iterator(); i.hasNext(); ) {
00293 Element el = i.next();
00294
00295
00296
00297
00298 if ( el.getName().equalsIgnoreCase( TAG_PREPOSITIONS ) )
00299 prepositionCount = Integer.valueOf( el.getAttributeValue( ATTR_COUNT ) );
00300
00301
00302
00303
00304 else if ( el.getName().equalsIgnoreCase( TAG_INSTRUCTIONS ) )
00305 instructionCount = Integer.valueOf( el.getAttributeValue( ATTR_COUNT ) );
00306
00307
00308
00309
00310 else if ( el.getName().equalsIgnoreCase( TAG_OBJECTS ) )
00311 objectCount = Integer.valueOf( el.getAttributeValue( ATTR_COUNT ) );
00312
00313
00314
00315
00316 else if ( el.getName().equalsIgnoreCase( TAG_PREPOSITION ) ) {
00317 String name = el.getAttributeValue( ATTR_NAME );
00318 int count = Integer.valueOf( el.getAttributeValue( ATTR_COUNT ) );
00319 prepositions.put( name, new Preposition( name, count ) );
00320 }
00321
00322
00323
00324
00325 else if ( el.getName().equalsIgnoreCase( TAG_ACTION ) ) {
00326 String name = el.getAttributeValue( ATTR_NAME );
00327 int count = Integer.valueOf( el.getAttributeValue( ATTR_COUNT ) );
00328
00329 Action a = new Action( name, count );
00330
00331
00332
00333
00334 List<Element> cond = el.getChildren();
00335 for ( Iterator<Element> j = cond.iterator(); j.hasNext(); ) {
00336 Element e = j.next();
00337 String evidences = e.getAttributeValue( ATTR_EVIDENCE );
00338 int countEvid = Integer.valueOf( e.getAttributeValue( ATTR_COUNT ) );
00339
00340 String[] evid = evidences.split( ";" );
00341 List<String> ev = new ArrayList<String>();
00342 for ( int k = 0; k < evid.length; k++ ) {
00343 if ( ! evid[k].isEmpty() )
00344 ev.add( evid[k] );
00345 }
00346
00347 a.conditions.add( new Condition( ev, countEvid ) );
00348 }
00349 actions.put( name, a );
00350 }
00351
00352
00353
00354
00355 else if ( el.getName().equalsIgnoreCase( TAG_OBJECT ) ) {
00356
00357 String name = el.getAttributeValue( ATTR_NAME );
00358
00359 InstObject o = new InstObject( name );
00360
00361
00362
00363
00364 List<Element> cond = el.getChildren();
00365 for ( Iterator<Element> j = cond.iterator(); j.hasNext(); ) {
00366 Element e = j.next();
00367 String evidences = e.getAttributeValue( ATTR_EVIDENCE );
00368 int countEvid = Integer.valueOf( e.getAttributeValue( ATTR_COUNT ) );
00369
00370 String[] evid = evidences.split( ";" );
00371 List<String> ev = new ArrayList<String>();
00372 for ( int k = 0; k < evid.length; k++ )
00373 ev.add( evid[k] );
00374
00375 o.conditions.add( new Condition( ev, countEvid ) );
00376 }
00377 objects.put( name, o );
00378 }
00379 }
00380 }
00381
00387 public void save() throws IOException {
00388
00389 if ( fileName != null )
00390 saveAs( fileName );
00391 }
00392
00399 public void saveAs( String fileName ) throws IOException {
00400
00401 this.fileName = fileName;
00402 File file = new File( fileName );
00403 if ( ! file.exists() )
00404 file.createNewFile();
00405
00406 Element disamb = new Element( TAG_DISAMBIGUATOR );
00407
00408
00409
00410
00411 Element instructions = new Element( TAG_INSTRUCTIONS );
00412 instructions.setAttribute( ATTR_COUNT, String.valueOf( instructionCount ) );
00413 disamb.addContent( instructions );
00414
00415
00416
00417
00418 Element obj = new Element( TAG_OBJECTS );
00419 obj.setAttribute( ATTR_COUNT, String.valueOf( objectCount ) );
00420 disamb.addContent( obj );
00421
00422
00423
00424
00425 Element preposition_count = new Element( TAG_PREPOSITIONS );
00426 preposition_count.setAttribute( ATTR_COUNT, String.valueOf( prepositionCount ) );
00427 disamb.addContent( preposition_count );
00428
00429
00430
00431
00432 Set<String> keys = prepositions.keySet();
00433 for ( Iterator<String> i = keys.iterator(); i.hasNext(); ) {
00434 Preposition pp = prepositions.get( i.next() );
00435
00436 Element p = new Element( TAG_PREPOSITION );
00437 p.setAttribute( ATTR_NAME, pp.name );
00438 p.setAttribute( ATTR_COUNT, String.valueOf( pp.count ) );
00439 disamb.addContent( p );
00440 }
00441
00442
00443
00444
00445 keys = actions.keySet();
00446 for ( Iterator<String> i = keys.iterator(); i.hasNext(); ) {
00447 Action a = actions.get( i.next() );
00448 Element e = new Element( TAG_ACTION );
00449 e.setAttribute( ATTR_NAME, a.name );
00450 e.setAttribute( ATTR_COUNT, String.valueOf( a.count ) );
00451
00452 for ( int j = 0; j < a.conditions.size(); j++ ) {
00453 Element dependency = new Element( TAG_DEPENDENCY );
00454 dependency.setAttribute( ATTR_COUNT, String.valueOf( a.conditions.get( j ).count ) );
00455 dependency.setAttribute( ATTR_EVIDENCE, toArrayString( a.conditions.get( j ).evidence ) );
00456 e.addContent( dependency );
00457 }
00458 disamb.addContent( e );
00459 }
00460
00461
00462
00463
00464 keys = objects.keySet();
00465 for ( Iterator<String> i = keys.iterator(); i.hasNext(); ) {
00466 InstObject o = objects.get( i.next() );
00467 Element e = new Element( TAG_OBJECT );
00468 e.setAttribute( ATTR_NAME, o.name );
00469
00470 for ( int j = 0; j < o.conditions.size(); j++ ) {
00471 Element dependency = new Element( TAG_DEPENDENCY );
00472 dependency.setAttribute( ATTR_COUNT, String.valueOf( o.conditions.get( j ).count ) );
00473 dependency.setAttribute( ATTR_EVIDENCE, toArrayString( o.conditions.get( j ).evidence ) );
00474 e.addContent( dependency );
00475 }
00476 disamb.addContent( e );
00477 }
00478
00479
00480
00481
00482 Document doc = new Document( disamb );
00483
00484 XMLOutputter outPutter = new XMLOutputter( " ", true );
00485 FileWriter outStream = new FileWriter( file );
00486
00487 outPutter.output( doc, outStream );
00488 }
00489
00490 public String toArrayString( List<String> list ) {
00491
00492 String str = "";
00493 for ( int i = 0; i < list.size(); i++ ) {
00494 str += list.get( i );
00495 if ( i < list.size() - 1 )
00496 str += ";";
00497 }
00498
00499 return str;
00500 }
00501
00502
00503
00504
00505
00509 private class Preposition {
00510 public String name;
00511 public int count;
00512
00513 public Preposition ( String name, int count ) {
00514
00515 this.name = name;
00516 this.count = count;
00517 }
00518 }
00519
00526 private class Condition {
00527 public List<String> evidence;
00528 public int count;
00529
00530 public Condition ( List<String> evidence, int count ) {
00531
00532 this.evidence = evidence;
00533 this.count = count;
00534 }
00535
00536 public boolean equals( Object o ) {
00537
00538 if ( ! ( o instanceof Condition ) )
00539 return false;
00540
00541 Condition c = (Condition) o;
00542 if ( c.evidence.isEmpty() && evidence.isEmpty() )
00543 return true;
00544 else if ( c.evidence.containsAll( evidence ) && evidence.containsAll( c.evidence ) )
00545 return true;
00546 else
00547 return false;
00548 }
00549 }
00550
00558 private class Action {
00559 public String name;
00560 public int count;
00561 public List<Condition> conditions = new ArrayList<Condition>();
00562
00563 public Action ( String name, int count ) {
00564
00565 this.name = name;
00566 this.count = count;
00567 }
00568 }
00569
00576 private class InstObject {
00577 public String name;
00578 public ArrayList<Condition> conditions = new ArrayList<Condition>();
00579
00580 public InstObject ( String name ) {
00581
00582 this.name = name;
00583 }
00584 }
00585
00592 private class ProbabilityComparator implements Comparator<String> {
00593
00594 List<String> evidence;
00595
00596 Class type;
00597
00598 public ProbabilityComparator ( List<String> evidence, Class c ) {
00599
00600 this.evidence = evidence;
00601 this.type = c;
00602 }
00603
00604 public int compare( String key1, String key2 ) {
00605
00606 double p1, p2;
00607
00608 if ( type == Action.class ) {
00609 Action a1 = actions.get( key1 );
00610 Action a2 = actions.get( key2 );
00611
00612 p1 = getProbabilityOfAction( a1.name, evidence );
00613 p2 = getProbabilityOfAction( a2.name, evidence );
00614 }
00615 else if ( type == InstObject.class ) {
00616 InstObject o1 = objects.get( key1 );
00617 InstObject o2 = objects.get( key2 );
00618
00619 p1 = getProbabilityOfObject( o1.name, evidence );
00620 p2 = getProbabilityOfObject( o2.name, evidence );
00621 }
00622 else
00623 return 0;
00624
00625 if ( p1 < p2 )
00626 return 1;
00627
00628 else if ( p1 > p2 )
00629 return - 1;
00630
00631 return 0;
00632 }
00633
00634 }
00635 }