00001 package edu.tum.cs.srl;
00002
00003 import java.io.BufferedReader;
00004 import java.io.PrintStream;
00005 import java.io.StringReader;
00006 import java.util.ArrayList;
00007 import java.util.Collection;
00008 import java.util.HashMap;
00009 import java.util.HashSet;
00010 import java.util.Map.Entry;
00011 import java.util.regex.Matcher;
00012 import java.util.regex.Pattern;
00013
00014 import edu.tum.cs.inference.IParameterHandler;
00015 import edu.tum.cs.inference.ParameterHandler;
00016 import edu.tum.cs.prolog.PrologKnowledgeBase;
00017 import edu.tum.cs.srl.taxonomy.Concept;
00018 import edu.tum.cs.srl.taxonomy.Taxonomy;
00019 import edu.tum.cs.util.FileUtil;
00020 import edu.tum.cs.util.StringTool;
00021 import edu.tum.cs.util.datastruct.MultiIterator;
00022
00028 public class Database implements IParameterHandler {
00029
00033 protected HashMap<String, Variable> entries;
00034 protected HashMap<RelationKey, HashMap<String, String[]>> functionalDependencies;
00035 protected HashMap<String, HashSet<String>> domains;
00036 public RelationalModel model;
00037 protected PrologKnowledgeBase prolog;
00038 protected Boolean prologDatabaseExtended = false;
00039
00040
00041
00042 protected Taxonomy taxonomy;
00043 protected HashMap<String, String> entity2type;
00044 protected HashMap<String, MultiIterator<String>> multiDomains;
00045
00046 protected boolean debug = false;
00047 protected boolean verbose = false;
00048 protected ParameterHandler paramHandler;
00049
00056 public Database(RelationalModel model) throws Exception {
00057 this.model = model;
00058 entries = new HashMap<String, Variable>();
00059 domains = new HashMap<String, HashSet<String>>();
00060 functionalDependencies = new HashMap<RelationKey, HashMap<String, String[]>>();
00061 paramHandler = new ParameterHandler(this);
00062 paramHandler.add("debug", "setDebug");
00063 paramHandler.add("debug", "setVerbose");
00064
00065
00066 for(Entry<String, String[]> e : model.getGuaranteedDomainElements().entrySet()) {
00067 for(String element : e.getValue())
00068 fillDomain(e.getKey(), element);
00069 }
00070
00071 Collection<String> prologRules = model.getPrologRules();
00072 if(prologRules != null && !prologRules.isEmpty()) {
00073 System.out.println(" building prolog knowledge base... ");
00074 prolog = new PrologKnowledgeBase();
00075
00076
00077
00078
00079 for(String rule : prologRules) {
00080 prolog.tell(rule);
00081 }
00082 }
00083
00084
00085 taxonomy = model.getTaxonomy();
00086 if(taxonomy != null) {
00087 entity2type = new HashMap<String, String>();
00088 multiDomains = new HashMap<String, MultiIterator<String>>();
00089 }
00090 }
00091
00092 public void setDebug(boolean debug) {
00093 this.debug = debug;
00094 }
00095
00112 public String getVariableValue(String varName, boolean closedWorld) throws Exception {
00113 String lowerCaseName = varName.toLowerCase();
00114 Variable var = this.entries.get(lowerCaseName);
00115
00116 if(var != null)
00117 return var.value;
00118
00119
00120 int braceIndex = varName.indexOf('(');
00121 String nodeName = varName.substring(0, braceIndex);
00122 Signature sig = model.getSignature(nodeName);
00123
00124
00125 if(sig.isLogical) {
00126 String[] args = varName.substring(braceIndex+1, varName.length()-1).split("\\s*,\\s*");
00127 return getPrologValue(sig, args) ? "True" : "False";
00128 }
00129
00130
00131
00132
00133 if(closedWorld) {
00134 if(sig.isBoolean())
00135 return "False";
00136 else {
00137 throw new Exception("Missing database value of " + varName + " - cannot apply closed-world assumption because domain is not boolean: " + sig.returnType);
00138 }
00139 }
00140
00141 return null;
00142 }
00143
00153 public Variable getVariable(String varName) {
00154 return entries.get(varName.toLowerCase());
00155 }
00156
00160 public boolean contains(String varName) {
00161 if(entries.containsKey(varName.toLowerCase()))
00162 return true;
00163
00164
00165
00166 String functionName = varName.substring(0, varName.indexOf('('));
00167 Signature sig = model.getSignature(functionName);
00168 return sig.isLogical;
00169 }
00170
00174 public boolean addVariable(Variable var) throws Exception {
00175 return addVariable(var, false, true);
00176 }
00177
00178 protected boolean addVariable(Variable var, boolean ignoreUndefinedFunctions, boolean doPrologAssertions) throws Exception {
00179 boolean ret = false;
00180 String entryKey = var.getKeyString().toLowerCase();
00181 if(entries.containsKey(entryKey))
00182 return ret;
00183
00184
00185
00186
00187 Signature sig = model.getSignature(var.functionName);
00188 if(sig == null) {
00189
00190 if(ignoreUndefinedFunctions)
00191 return ret;
00192 else
00193 throw new Exception(String.format("Function %s appears in the data but is not declared in the model.", var.functionName));
00194 }
00195
00196 if(sig.isLogical && doPrologAssertions) {
00197 if(var.isTrue()) {
00198 String func = var.functionName;
00199 func = func.substring(0, 1).toLowerCase() + func.substring(1);
00200 String line = func + "(";
00201 for(String par : var.params) {
00202 line += par.substring(0, 1).toLowerCase() + par.substring(1) + ",";
00203 }
00204 line = line.substring(0, line.length() - 1) + ")";
00205 if(debug) System.out.println("Prolog: asserted " + line);
00206 prolog.tell(line + ".");
00207 }
00208 }
00209
00210 if(sig.argTypes.length != var.params.length)
00211 throw new Exception("The database entry '" + var.getKeyString() + "' is not compatible with the signature definition of the corresponding function: expected " + sig.argTypes.length + " parameters as per the signature, got " + var.params.length + ".");
00212
00213
00214 if(!sig.isBoolean())
00215 fillDomain(sig.returnType, var.value);
00216 for(int i = 0; i < sig.argTypes.length; i++) {
00217
00218
00219 fillDomain(sig.argTypes[i], var.params[i]);
00220 }
00221
00222
00223 entries.put(entryKey, var);
00224 ret = true;
00225
00226
00227
00228 Collection<RelationKey> keys = this.model.getRelationKeys(var.functionName);
00229 if(keys != null) {
00230
00231 if(!var.isTrue())
00232 return ret;
00233
00234 for(RelationKey key : keys) {
00235
00236 StringBuffer sb = new StringBuffer();
00237 int i = 0;
00238 for(Integer paramIdx : key.keyIndices) {
00239 if(i++ > 0)
00240 sb.append(',');
00241 sb.append(var.params[paramIdx]);
00242 }
00243
00244 HashMap<String, String[]> hm = functionalDependencies.get(key);
00245 if(hm == null) {
00246 hm = new HashMap<String, String[]>();
00247 functionalDependencies.put(key, hm);
00248 }
00249 hm.put(sb.toString(), var.params);
00250 }
00251 }
00252 return ret;
00253 }
00254
00255 public String[] getParameterSet(RelationKey key, String[] keyValues) {
00256
00257
00258 HashMap<String, String[]> m = functionalDependencies.get(key);
00259 if(m == null)
00260 return null;
00261 return m.get(StringTool.join(",", keyValues));
00262 }
00263
00264 public void readBLOGDB(String databaseFilename) throws Exception {
00265 readBLOGDB(databaseFilename, false);
00266 }
00267
00268 public void readBLOGDB(String databaseFilename, boolean ignoreUndefinedNodes) throws Exception {
00269
00270 if(verbose)
00271 System.out.printf(" reading contents of %s...\n", databaseFilename);
00272 String dbContent = FileUtil.readTextFile(databaseFilename);
00273
00274
00275 if(verbose)
00276 System.out.println(" removing comments");
00277 Pattern comments = Pattern.compile("//.*?$|/\\*.*?\\*/", Pattern.MULTILINE | Pattern.DOTALL);
00278 Matcher matcher = comments.matcher(dbContent);
00279 dbContent = matcher.replaceAll("");
00280
00281
00282 if(verbose)
00283 System.out.println(" reading items");
00284 Pattern re_entry = Pattern.compile("(\\w+)\\(([^\\)]+)\\)\\s*=\\s*([^;]*);?");
00285 Pattern re_domDecl = Pattern.compile("(\\w+)\\s*=\\s*\\{(.*?)\\}");
00286 BufferedReader br = new BufferedReader(new StringReader(dbContent));
00287 String line;
00288 int numVars = 0;
00289 while((line = br.readLine()) != null) {
00290 line = line.trim();
00291
00292 matcher = re_entry.matcher(line);
00293 if(matcher.matches()) {
00294
00295
00296 Variable var = new Variable(matcher.group(1), matcher.group(2).split("\\s*,\\s*"), matcher.group(3), model);
00297
00298 addVariable(var, ignoreUndefinedNodes, true);
00299 if(++numVars % 100 == 0 && verbose)
00300 System.out.print(" " + numVars + " vars read\r");
00301 continue;
00302 }
00303
00304 matcher = re_domDecl.matcher(line);
00305 if(matcher.matches()) {
00306 String domName = matcher.group(1);
00307 String[] constants = matcher.group(2).split("\\s*,\\s*");
00308 for(String c : constants)
00309 fillDomain(domName, c);
00310 continue;
00311 }
00312
00313 if(line.length() != 0) {
00314 throw new Exception("Database entry could not be read: " + line);
00315 }
00316 }
00317 }
00318
00328 protected void fillDomain(String type, String value) throws Exception {
00329
00330
00331
00332
00333 if(taxonomy != null) {
00334 String prevType = entity2type.get(value);
00335 if(prevType != null) {
00336 if(prevType.equals(type))
00337 return;
00338
00339 if(taxonomy.query_isa(type, prevType))
00340 domains.get(prevType).remove(value);
00341
00342 else if(taxonomy.query_isa(prevType, type))
00343 return;
00344 else
00345 ;
00346 }
00347 entity2type.put(value, type);
00348 }
00349
00350 HashSet<String> dom = domains.get(type);
00351 if(dom == null) {
00352 dom = new HashSet<String>();
00353 domains.put(type, dom);
00354 }
00355 if(!dom.contains(value))
00356 dom.add(value);
00357 }
00358
00363 public void checkDomains(boolean verbose) {
00364 ArrayList<HashSet<String>> doms = new ArrayList<HashSet<String>>();
00365 ArrayList<String> domNames = new ArrayList<String>();
00366 for(Entry<String, HashSet<String>> entry : domains.entrySet()) {
00367 doms.add(entry.getValue());
00368 domNames.add(entry.getKey());
00369 }
00370 for(int i = 0; i < doms.size(); i++) {
00371 for(int j = i + 1; j < doms.size(); j++) {
00372
00373 HashSet<String> dom1 = doms.get(i);
00374 HashSet<String> dom2 = doms.get(j);
00375 for(String value : dom1) {
00376 if(dom2.contains(value)) {
00377
00378 if(verbose)
00379 System.out.println("Domains " + domNames.get(i) + " and " + domNames.get(j) + " overlap (both contain " + value + "). Merging...");
00380 String targetDomName = domNames.get(i);
00381 this.model.replaceType(domNames.get(j), targetDomName);
00382
00383 dom1.addAll(dom2);
00384 doms.set(j, dom1);
00385 for(String v : dom2)
00386 entity2type.put(v, targetDomName);
00387 break;
00388 }
00389 }
00390 }
00391 }
00392 }
00393
00400 public Iterable<String> getDomain(String domName) throws Exception {
00401 if(taxonomy == null)
00402 return domains.get(domName);
00403 else {
00404 MultiIterator<String> dom = multiDomains.get(domName);
00405 if(dom != null)
00406 return dom;
00407 dom = new MultiIterator<String>();
00408 boolean isEmpty = true;
00409 for(Concept c : taxonomy.getDescendants(domName)) {
00410 Iterable<String> subdom = domains.get(c.name);
00411 if(subdom != null) {
00412 dom.add(subdom);
00413 isEmpty = false;
00414 }
00415 }
00416 if(isEmpty)
00417 dom = null;
00418 multiDomains.put(domName, dom);
00419 return dom;
00420 }
00421 }
00422
00430 public Collection<Variable> getEntries() throws Exception {
00431
00432
00433 if(prolog != null && !prologDatabaseExtended) {
00434 prologDatabaseExtended = true;
00435 for(Signature sig : this.model.getSignatures()) {
00436 if(sig.isLogical) {
00437 Collection<String[]> bindings = ParameterGrounder.generateGroundings(sig, this);
00438 for(String[] b : bindings)
00439 getPrologValue(sig, b);
00440 }
00441 }
00442 }
00443 return entries.values();
00444 }
00445
00453 protected boolean getPrologValue(Signature sig, String[] args) throws Exception {
00454 String[] prologArgs = new String[args.length];
00455 for(int j = 0; j < args.length; j++)
00456 prologArgs[j] = args[j].substring(0, 1).toLowerCase() + args[j].substring(1);
00457 boolean value = prolog.ask(Signature.formatVarName(sig.functionName, prologArgs));
00458 Variable var = new Variable(sig.functionName, args, value ? "True" : "False", model);
00459 boolean added = addVariable(var, false, false);
00460 if(added && debug)
00461 System.out.println("Prolog: computed " + var);
00462 return value;
00463 }
00464
00471 public String[][] getEntriesAsArray() throws Exception {
00472 Collection<Variable> vars = getEntries();
00473 String[][] ret = new String[entries.size()][2];
00474 int i = 0;
00475 for(Variable var : vars) {
00476 ret[i][0] = var.getKeyString();
00477 ret[i][1] = var.value;
00478 i++;
00479 }
00480 return ret;
00481 }
00482
00490 public void setClosedWorldPred(String predName) throws Exception {
00491 Signature sig = this.model.getSignature(predName);
00492 if(sig == null)
00493 throw new Exception("Cannot determine signature of " + predName);
00494 String[] params = new String[sig.argTypes.length];
00495 setClosedWorldPred(sig, 0, params);
00496 }
00497
00498 protected void setClosedWorldPred(Signature sig, int i, String[] params) throws Exception {
00499 if(i == params.length) {
00500 String varName = Signature.formatVarName(sig.functionName, params);
00501 if(!this.contains(varName)) {
00502 Variable var = new Variable(sig.functionName, params.clone(), "False", model);
00503 this.addVariable(var);
00504 }
00505 return;
00506 }
00507 Iterable<String> dom = this.getDomain(sig.argTypes[i]);
00508 if(dom == null)
00509 return;
00510 for(String value : dom) {
00511 params[i] = value;
00512 setClosedWorldPred(sig, i + 1, params);
00513 }
00514 }
00515
00516 public Signature getSignature(String functionName) {
00517 return model.getSignature(functionName);
00518 }
00519
00520 public void printDomain(PrintStream out) {
00521 for(Entry<String, HashSet<String>> e : domains.entrySet()) {
00522 out.println(e.getKey() + ": " + StringTool.join(", ", e.getValue()));
00523 }
00524 }
00525
00526 public void print() throws Exception {
00527 for(Variable v : getEntries())
00528 System.out.println(v.toString());
00529 }
00530
00536 public void readMLNDB(String databaseFilename) throws Exception {
00537 readMLNDB(databaseFilename, false);
00538 }
00539
00543 public void readMLNDB(String databaseFilename, boolean ignoreUndefinedNodes) throws Exception {
00544 boolean verbose = false;
00545
00546
00547 if(verbose)
00548 System.out.printf("reading contents of %s...\n", databaseFilename);
00549 String dbContent = FileUtil.readTextFile(databaseFilename);
00550
00551
00552
00553 Pattern comments = Pattern.compile("//.*?$|/\\*.*?\\*/", Pattern.MULTILINE | Pattern.DOTALL);
00554 Matcher matcher = comments.matcher(dbContent);
00555 dbContent = matcher.replaceAll("");
00556
00557
00558
00559 String arg = "\\w+";
00560 String argList = "\\s*" + arg + "\\s*(?:,\\s*" + arg + "\\s*)*";
00561 Pattern re_entry = Pattern.compile("(!?\\w+)\\((" + argList + ")\\)");
00562 Pattern re_domDecl = Pattern.compile("(\\w+)\\s*=\\s*\\{(" + argList + ")\\}");
00563 BufferedReader br = new BufferedReader(new StringReader(dbContent));
00564 String line;
00565 Variable var;
00566 while((line = br.readLine()) != null) {
00567 line = line.trim();
00568
00569 matcher = re_entry.matcher(line);
00570 if(matcher.matches()) {
00571 if(matcher.group(1).startsWith("!"))
00572 var = new Variable(matcher.group(1).substring(1), matcher.group(2).trim().split("\\s*,\\s*"), "False", model);
00573 else
00574 var = new Variable(matcher.group(1), matcher.group(2).trim().split("\\s*,\\s*"), "True", model);
00575
00576 addVariable(var, ignoreUndefinedNodes, true);
00577
00578
00579 continue;
00580 }
00581
00582
00583 matcher = re_domDecl.matcher(line);
00584 if(matcher.matches()) {
00585 String domNam = matcher.group(1);
00586 String[] constants = matcher.group(2).trim().split("\\s*,\\s*");
00587 for(String c : constants)
00588 fillDomain(domNam, c);
00589 continue;
00590 }
00591
00592 if(line.length() != 0)
00593 System.err.println("Line could not be read: " + line);
00594 }
00595 }
00596
00601 public HashMap<String, HashSet<String>> getDomains() throws Exception {
00602 if(taxonomy != null)
00603 throw new Exception("Cannot safely return the set of domains for a model that uses a taxonomy");
00604 return domains;
00605 }
00606
00607 public RelationalModel getModel() {
00608 return this.model;
00609 }
00610
00617 public String getConstantType(String constant) {
00618 for(Entry<String, HashSet<String>> e : this.domains.entrySet()) {
00619 if(e.getValue().contains(constant)) {
00620 return e.getKey();
00621 }
00622 }
00623 return null;
00624 }
00625
00626 public static class Variable extends edu.tum.cs.srl.AbstractVariable {
00627
00628 RelationalModel model;
00629
00630 public Variable(String functionName, String[] params, String value, RelationalModel model) {
00631 super(functionName, params, value);
00632 this.model = model;
00633 }
00634
00635 public String getPredicate() {
00636 if(isBoolean())
00637 return functionName + "(" + StringTool.join(",", params) + ")";
00638 else
00639 return functionName + "(" + StringTool.join(",", params) + "," + value + ")";
00640 }
00641
00642 public boolean isBoolean() {
00643 return model.getSignature(functionName).isBoolean();
00644 }
00645
00646 @Override
00647 public String toString() {
00648 return String.format("%s = %s", Signature.formatVarName(functionName, params), value);
00649 }
00650 }
00651
00652 @Override
00653 public ParameterHandler getParameterHandler() {
00654 return paramHandler;
00655 }
00656
00657 public void setVerbose(boolean verbose) {
00658 this.verbose = verbose;
00659 }
00660 }