planner_benchmarks: eval.py Source File

Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 
00003 # Evaluate results from a run and compute tables, e.g. makespans, runtimes...
00004 
00005 from __future__ import with_statement
00006 
00007 from optparse import OptionParser
00008 import os
00009 import subprocess
00010 import shutil
00011 import collections
00012 import datetime
00013 import re
00014 
00015 import data_tools
00016 from plan_file_parser import parsePlan
00017 from plan_file_parser import makespanFromPlan
00018 
00019 class Problem(object):
00020     """ An evaluated problem. """
00021     def __init__(self, path, name):
00022         """ Default init using a problem name, e.g. 02, and a path that contains a plan...best and times file """
00023         self.name = name
00024         if path and self.name:
00025             # valid problem has times file + plan.best - if none, no data, if no plan, is there times???
00026             self.planfiles = []
00027             self.planfiles.append(os.path.join(path, "plan.p" + self.name + ".pddl.best"))
00028             self.timesfile = os.path.join(path, "times.p" + self.name + ".pddl")
00029             self.makespan, self.runtime = self.parseTimes()
00030     def fromDirectory(path, name):
00031         """ Constructor for a directory at "path/name/" that contains
00032             multiple plan.soln.### files """
00033         p = Problem(None, None)
00034         p.name = name
00035         data_dir = os.path.join(path, name)
00036         assert os.path.isdir(data_dir)
00037         reg = "^plan\\.soln\\.[0-9]+$"
00038         plan_files = [x for x in os.listdir(data_dir) if re.match(reg, x)]
00039         # sort by integer number to put, e.g. 10 behind 1,2,...,9
00040         def plan_nr(planfile):
00041             assert planfile.startswith("plan.soln.")
00042             nrInt = int(planfile[len("plan.soln."):])
00043             return nrInt
00044         plan_files.sort(key = plan_nr)
00045         p.planfiles = [os.path.join(path, name, plan) for plan in plan_files]
00046         # Read in plan files in numbered order!!!
00047         if p.planfiles:
00048             p.makespan = makespanFromPlan(parsePlan(p.planfiles[-1]))   # makespan is the makespan of the last/best 
00049         p.runtime = -1
00050         return p
00051     fromDirectory = staticmethod(fromDirectory)
00052     def fromData(name, makespan):
00053         """ Constructor from raw data without file reference """
00054         p = Problem(None, None)
00055         p.name = name
00056         p.makespan = makespan
00057         p.runtime = -1
00058         return p
00059     fromData = staticmethod(fromData)
00060     def dump(self):
00061         print "Name:",self.name, "Makespan:", self.makespan, "Runtime:", self.runtime
00062         print "Plans: %s, Times: %s" % (self.planfiles, self.timesfile)
00063     def __repr__(self):
00064         return self.name
00065     def write(self, stream):
00066         """ Write out name + makespan + runtime """
00067         print >> stream, self.name, self.makespan, self.runtime
00068     def parseTimes(self):
00069         with open(self.timesfile) as f:
00070             lastLine = ""
00071             for line in f:
00072                 if not line or line.startswith("#"):
00073                     continue
00074                 lastLine = line
00075             if not lastLine:    # No data written -> no results/no plan
00076                 yield -1
00077                 yield -1
00078             else:
00079                 entries = lastLine.split()
00080                 assert len(entries) == 2
00081                 yield (float)(entries[0])
00082                 yield (float)(entries[1])
00083     def hasSameFirstPlan(self, other):
00084         """ Compare my plan with the plan of other """
00085         if not self.planfiles or not other.planfiles:
00086             return False
00087         retcode = subprocess.call(["diff", "-q", self.planfiles[0], other.planfiles[0]], stdout=subprocess.PIPE)
00088         return retcode == 0
00089 
00090 def readRefDataFromFile(ref_file):
00091     """ Read reference data from a file and return a problem dict
00092         Containing {Domain: problem_list}
00093         Format from linewise ipc2008 data:
00094         tempo-sat  temporal-fast-downward  transport-numeric  12        OK       982                433                 0.440936863544
00095         track       planner                 domain          problem#  solved?   planner-quality reference-quality       score=reference/planner-quality
00096         We are interested in: tempo-sat track, any planner (don't care), any domain, any problem -> read the reference-quality
00097     """
00098     theDict = {}
00099     with open(ref_file) as f:
00100         for line in f:
00101             line = line.strip()
00102             if not line.startswith("tempo-sat"):
00103                 continue
00104             if not line:
00105                 continue
00106             entries = line.split()
00107             assert len(entries) == 8, "Wrong number of entries in ref data line"
00108             domain = entries[2]
00109             problemNr = int(entries[3])
00110             problem = "%02d" % problemNr
00111             ref_quality_str = entries[6]
00112             ref_quality = None
00113             if not ref_quality_str == "n/a":
00114                 ref_quality = float(entries[6])
00115             if not ref_quality:
00116                 continue
00117             p = Problem.fromData(problem, ref_quality)
00118             if not domain in theDict:
00119                 theDict[domain] = []
00120             matches = [x for x in theDict[domain] if repr(p) == repr(x)]
00121             if matches:
00122                 for i in matches:
00123                     assert i.makespan == ref_quality
00124             else:
00125                 theDict[domain].append(p)
00126     for key, val in theDict.iteritems():
00127         val.sort(key=lambda x: repr(x))
00128     return theDict
00129 
00130 def evalDir(path, files, files_are_condensed):
00131     """ Evaluate the directory in path that contains a number of
00132         plan...pddl.best files.
00133         Returns a dict mapping from path to a list of Problems 
00134         
00135         If files_are_condensed is False the files list is a list of
00136         directories that contain one problem each with plan.soln.XXX files.
00137         """
00138     # First create a list of problems
00139     problems = []
00140     if files_are_condensed:
00141         # The expected format for problem named X with problem file X.pddl is:
00142         # plan.pX.pddl.best and times.pX.pddl
00143         plannames = []
00144         for f in files:
00145             if f.endswith(".pddl.best"):
00146                 # plan.p03.pddl.best -> p03
00147                 assert(f.startswith("plan.p"))
00148                 assert(len(f) > len("plan.p") + len(".pddl.best"))
00149                 name = f[len("plan.p") : - len(".pddl.best")]
00150                 plannames.append(name)
00151         plannames.sort(key=str.lower)
00152         # valid names are those, where a times.X.pddl file exists
00153         validnames = [name for name in plannames if os.path.exists(os.path.join(path, "times.p" + name + ".pddl"))]
00154         problems.extend([Problem(path, name) for name in validnames])
00155     else:
00156         for i in files:
00157             p = Problem.fromDirectory(path, i)
00158             problems.append(p)
00159 
00160     # create return dictionary from path A/B/C/files -> {A: {B: {C: problems} } }
00161     theDict = {}
00162     dirList = data_tools.make_dir_list(path)
00163 
00164     # seed dict with lowest level containing the problems
00165     assert len(dirList) > 0
00166     theDict[dirList[-1]] = problems
00167     dirList = dirList[:-1]
00168 
00169     # build the dict by pushing the current dict as val in the last entry in dirList
00170     while len(dirList) > 0:
00171         newDict = {}
00172         newDict[dirList[-1]] = theDict
00173         theDict = newDict
00174         dirList = dirList[:-1]
00175 
00176     return theDict
00177 
00178 def parseResults(eval_dir):
00179     """ Parse dir and all subdirectories to create results. 
00180         Returns a dictionary of the directory structure relative
00181         to eval_dir with the evaluated problems """
00182     # List directories, a problem/domain directory contains one or more files with "pddl"
00183 
00184     allResults = {}
00185     for root, dirs, files in os.walk(eval_dir):
00186         # If files contains a file with "plan...pddl" in it, it is a result dir
00187         pddls = [f for f in files if f.lower().find("pddl") >= 0 and f.lower().find("plan.") >= 0]
00188         if pddls:
00189             dirEval = evalDir(root, files, True)
00190             allResults = data_tools.merge(allResults, dirEval)
00191         
00192         reg = "^[0-9]+$"    # only numbers
00193         probs = [f for f in dirs if re.match(reg, f)]
00194         probs.sort()
00195         if probs:
00196             dirEval = evalDir(root, probs, False)
00197             allResults = data_tools.merge(allResults, dirEval)
00198 
00199     return allResults
00200 
00201 def writeEvalData(evaldict, path):
00202     """ write eval.dat with evaluation for all problems
00203         in each domain directory """
00204     for key, val in evaldict.iteritems():
00205         localpath = os.path.join(path, key)
00206         if data_tools.behaves_like_dict(val):   # recurse
00207             writeEvalData(val, localpath)
00208         else:   # list of problems
00209             f = file(os.path.join(localpath, "eval.dat"), "w")
00210             for i in val:
00211                 i.write(f)
00212             f.close()
00213 
00214 def buildNameIdentifierDict(evaldict, name_entries_dict, curIdent):
00215     """ From a dictionary like {K: {A: {X:1 Y:2}, B: {X:1 Z:3}}}
00216         build a dictionary: {X: {"K/A":1 "K/B": 1} Y: {"K/A": 2} Z: {"K/B" : 3}} """
00217     for key, val in evaldict.iteritems():
00218         if data_tools.behaves_like_dict(val):   # recurse
00219             buildNameIdentifierDict(val, name_entries_dict, curIdent + "/" + key)
00220         else:           # found final dict that contains name -> problems
00221             if not key in name_entries_dict:    # init
00222                 name_entries_dict[key] = {}
00223             name_entries_dict[key][curIdent] = val
00224 
00225 def writeTex(evaldict, filename, refDict):
00226     """ Write latex file for this dict.
00227         For dict: {X: {A: problems, B: problems}, Y: {A: problems, C: problems}}
00228         the output should be a table for A, B, C, where A has cols X/Y, B col X, C col Y
00229         """
00230     # First build the name -> {identifier -> entries} dict.
00231     nameEntriesDict = {}
00232     buildNameIdentifierDict(evaldict, nameEntriesDict, "")
00233 
00234     refEntriesDict = None
00235     if refDict:
00236         refEntriesDict = {}
00237         buildNameIdentifierDict(refDict, refEntriesDict, "")
00238 
00239     f = open(filename, "w")
00240     print >> f, '\\documentclass{article}'
00241     print >> f, "\\usepackage{caption}"
00242     print >> f, '\\begin{document}'
00243 
00244     if not refEntriesDict:      # runtime doesnt make sense for ref data unless we have some info
00245         writeTexTable(nameEntriesDict, f, "makespan", "<", refEntriesDict)
00246         writeTexTable(nameEntriesDict, f, "runtime", "<", refEntriesDict)
00247     else:
00248         writeTexTable(nameEntriesDict, f, "makespan", ">", refEntriesDict)
00249         writeTexTable(nameEntriesDict, f, "runtime", ">", refEntriesDict)
00250 
00251     print >> f, '\\end{document}'
00252     f.flush()
00253     f.close()
00254 
00255 def evaluateProblem(problem, referenceProblem, target):
00256     """ Evaluate problem's target property.
00257         If a referenceProblem is given it is evaluated with respect to that.
00258         In that case problem might be None if there was no data and None is returned. """
00259 
00260     if problem is None:
00261         return None
00262 
00263     targetStr = "problem." + target
00264     try:
00265         eval(targetStr)
00266     except AttributeError:
00267         return None
00268 
00269     refProbStr = "referenceProblem." + target
00270     if referenceProblem:
00271         try:
00272             return eval(refProbStr) / eval(targetStr)     # TODO generalize?
00273         except ZeroDivisionError:       # targetStr was incredibly small -> result in huge eval + debug
00274             return 999999.99
00275     else:
00276         return eval(targetStr)
00277 
00278 def writeTexTableEntry(f, problem, referenceProblem, target, best, num, sums):
00279     """ Write one entry for an output table referring to the target property of problem.
00280         If referenceProblem is given the problem is compared to the referenceProblem
00281         and relative values are printed.
00282         Comparison in done with respect to best.
00283         In that case problem might be None if there was no result/data.
00284         Sums is a dictionary of num -> accumulated sum that should be updated. """
00285 
00286     targetStr = "problem." + target
00287     refProbStr = "referenceProblem." + target
00288     
00289     # write actual entry for each ident
00290     probVal = evaluateProblem(problem, referenceProblem, target)
00291     if probVal:
00292         print >> f, "{\\tiny ", "%.0f" % eval(targetStr), "} ",
00293         if best and best == probVal:   # this is the best entry
00294             print >> f, "\\textbf{",
00295         print >> f, "%.2f" % probVal,
00296         if best and best == probVal:
00297             print >> f, "}",
00298             sums[num] += probVal   # count for score
00299     else:
00300         print >> f, "-",
00301 
00302 def writeTexTableLine(f, problem_id, runs, refVals, target, better, numEntries, sums):
00303     """ Write one line for problem_id in the output table referring to the target property of the problem.
00304         If refVals is given the problem is compared to the reference
00305         and relative values are printed.
00306         Comparison in done with the better property of the division of the properties.
00307         In that case runs might not contain a problem for problem_id if there was no result/data.
00308         numEntries is only used to decide when the line ends.
00309         Sums is a dictionary of run_num -> accumulated sum """
00310     
00311     print >> f, "   ", problem_id, "    &",
00312  
00313     # First find the referenceProblem for problem_id
00314     refProb = None
00315     if refVals:
00316         refProbSearch = [j for j in refVals if repr(j) == problem_id]
00317         assert refProbSearch
00318         refProb = refProbSearch[0]
00319    
00320     # find best entry in line
00321     best = None
00322     for num, ident, probs in runs:
00323         # First find the current problem for problem_id
00324         myProb = None
00325         myProbSearch = [j for j in probs if repr(j) == problem_id]
00326         if myProbSearch:
00327             myProb = myProbSearch[0]
00328 
00329         probVal = evaluateProblem(myProb, refProb, target) 
00330         if probVal is None:
00331             continue
00332         if not best:
00333             best = probVal
00334         else:
00335             if eval("probVal" + better + "best"):
00336                 best = probVal
00337 
00338     # write actual entry for each ident
00339     for num, ident, probs in runs:
00340          # First find the current problem for problem_id
00341         myProb = None
00342         myProbSearch = [j for j in probs if repr(j) == problem_id]
00343         if myProbSearch:
00344             myProb = myProbSearch[0]
00345 
00346         writeTexTableEntry(f, myProb, refProb, target, best, num, sums)
00347   
00348         if num < numEntries - 1:
00349             print >> f, "&",
00350         else:
00351             print >> f, "",
00352     print >> f, "\\\\"
00353 
00354 def writeTexTable(nameEntriesDict, f, target, better, refEntriesDict):
00355     """ Write latex table for this dict.
00356         Creates a table that has one row per problem.
00357         There is one column per Setting/Version.
00358         Target gives the target property of a problem to write in a column.
00359         If comparing the targets with better and item is equal to
00360         the best, the entry is marked bold.
00361         """
00362     for domain, val in nameEntriesDict.iteritems():    # domain -> {Run -> problems}
00363         refVals = None
00364         if refEntriesDict:
00365             try:
00366                 refVals = refEntriesDict[domain]
00367                 # don't really care about the ident for ref data, so just go down
00368                 # in dict until we have the problem list
00369                 while data_tools.behaves_like_dict(refVals):
00370                     assert len(refVals) == 1        # there should only be one ref data per domain
00371                     for rkey, rval in refVals.iteritems():
00372                         refVals = rval
00373                         break
00374                 # print "REF vals for domain", domain, "is", refVals
00375             except:
00376                 print "WARNING: No reference data for domain", domain, "- skipping domain!"
00377                 continue
00378 
00379         # Runs contains:  Unique number, Descriptor Ident, Problem list
00380         runs = [ (num, ident, probs) for num, (ident,probs) in enumerate(val.iteritems()) ]
00381 
00382         # Write table header
00383         print "Writing table for", domain
00384         print >> f, '\\begin{table}'
00385         print >> f, '  \\centering'
00386         print >> f, '  \\begin{tabular}{|l',
00387         for num, ident, probs in runs:
00388             print >> f, "|c",
00389         print >> f, "|}"
00390         print >> f, '  \\hline'
00391 
00392         print >> f, '  Problem & ',
00393         for num, ident, probs in runs:
00394             print >> f, num,
00395             if num < len(val) - 1:
00396                 print >> f, "&",
00397             else:
00398                 print >> f, "",
00399         print >> f, "\\\\"
00400         print >> f, '  \\hline'
00401         
00402         # First collect all problems (some might not have entries)
00403         problems = set()
00404         for num, ident, probs in runs:
00405             for i in probs:
00406                 problems.add(repr(i))
00407         probList = list(problems)
00408         probList.sort(key=str.lower)
00409 
00410         if refVals:
00411             # verify check that all problems have ref-data
00412             for i in probList:
00413                 refProbs = [p for p in refVals if repr(p) == i]
00414                 if not refProbs:
00415                     print "No Ref data for domain", domain, " problem:", repr(i), "- skipping"
00416                     continue
00417             # probList = all refvals
00418             # i.e. ignore original probList (might get some with no problem, which is OK, empty entries)
00419             probList = [repr(i) for i in refVals]
00420 
00421         sums = dict( [ (num, 0) for num, ident, probs in runs ] )   # run# -> accumulated score
00422         ref_sum = len(probList) # every ref prob scores 1.0 quality
00423 
00424         # Now for each problem, write a table line
00425         for i in probList:
00426             writeTexTableLine(f, i, runs, refVals, target, better, len(val), sums)
00427 
00428         if refVals and target == "makespan":    # this only makes sense for makespans
00429             print >> f, '  \\hline'
00430             print >> f, "Total &",
00431             for num, ident, probs in runs:
00432                 print >> f, "%.2f" % sums[num], " / %.2f" % ref_sum,
00433                 if num < len(val) - 1:
00434                     print >> f, "&",
00435                 else:
00436                     print >> f, "",
00437             print >> f, "\\\\"
00438 
00439         print >> f, '  \\hline'
00440         print >> f, '  \\end{tabular}'
00441         print >> f, '  \\caption{\\textbf{', target, '} Domain: \\textbf{', domain, '}',
00442         print >> f, "\\\\"
00443         # write descriptor in caption matching run# to a description
00444         for num, ident, probs in runs:
00445             print >> f, str(num) + ": ", ident.replace("_", "\\_") + ",",
00446             print >> f, "\\\\"
00447         print >> f, ' }'
00448         print >> f, '\\end{table}'
00449         print >> f, '\\clearpage'
00450         print >> f, ''
00451 
00452 def checkPlans(evaldict, refDict):
00453     """ Check if plans in evaldict are equal to those in refDict
00454         """
00455     # First build the name -> {identifier -> entries} dict.
00456     nameEntriesDict = {}
00457     buildNameIdentifierDict(evaldict, nameEntriesDict, "")
00458 
00459     refEntriesDict = {}
00460     buildNameIdentifierDict(refDict, refEntriesDict, "")
00461 
00462     for domain, val in nameEntriesDict.iteritems():    # domain -> {Run -> problems}
00463         print "\nChecking Plans match ref data for domain:", domain
00464         refVals = None
00465         try:
00466             refVals = refEntriesDict[domain]
00467             # don't really care about the ident for ref data, so just go down
00468             # in dict until we have the problem list
00469             while data_tools.behaves_like_dict(refVals):
00470                 assert len(refVals) == 1        # there should only be one ref data per domain
00471                 for rkey, rval in refVals.iteritems():
00472                     refVals = rval                        
00473                     break
00474             #print "REF vals for domain", domain, "is", refVals
00475         except:
00476             print "WARNING: No reference data for domain", domain, "- skipping domain!"
00477             continue
00478 
00479         # Runs contains:  Unique number, Descriptor Ident, Problem list
00480         runs = [ (num, ident, probs) for num, (ident,probs) in enumerate(val.iteritems()) ]
00481 
00482         # First collect all problems (some might not have entries)
00483         problems = set()
00484         for num, ident, probs in runs:
00485             for i in probs:
00486                 problems.add(repr(i))
00487         probList = list(problems)
00488         probList.sort(key=str.lower)
00489 
00490         probWithRefList = []
00491         for i in probList:
00492             refProbs = [p for p in refVals if repr(p) == i]
00493             if not refProbs:
00494                 print "No Ref data for domain", domain, " problem:", repr(i), "- skipping"
00495                 continue
00496             assert len(refProbs) == 1
00497             probWithRefList.append(i)
00498         probList = probWithRefList
00499 
00500         # Now for each problem, compare plan to ref data
00501         for i in probList:
00502             for num, ident, probs in runs:
00503                 myProb = [j for j in probs if repr(j) == i]
00504                 refProb = [j for j in refVals if repr(j) == i]
00505                 samePlan = myProb[0].hasSameFirstPlan(refProb[0])
00506                 if samePlan:
00507                     print repr(myProb[0]), "OK"
00508                 else:
00509                     print "Problem:", repr(myProb[0]), "for run", ident,
00510                     print "plan does NOT MATCH ref data"
00511 
00512 def main():
00513     parser = OptionParser("usage: %prog ")
00514     parser.add_option("-e", "--eval-dir", dest="eval_dir", type="string", action="store")
00515     parser.add_option("-r", "--ref-data", dest="ref_data", type="string", action="store")
00516     parser.add_option("-c", "--check-plans", action="store_true", dest="check_plans", default=False)
00517     opts, args = parser.parse_args()
00518     print "Eval results dir: %s" % opts.eval_dir
00519     print "Ref data: %s" % opts.ref_data
00520     print "Check plans against ref data: %s" % opts.check_plans
00521 
00522     evalDict = parseResults(opts.eval_dir)
00523     # print "FINAL EVAL DICT: ", evalDict
00524 
00525     ref_data = None
00526     if opts.ref_data:
00527         if os.path.isdir(opts.ref_data):
00528             ref_data = parseResults(opts.ref_data)
00529         elif os.path.isfile(opts.ref_data):
00530             ref_data = readRefDataFromFile(opts.ref_data)
00531         else:
00532             assert False, "ref data is neither dir nor file"
00533         assert ref_data, "No ref_data read."
00534         print "Ref-Domains:", ", ".join(ref_data.keys())
00535         #print "REF DATA DICT: ", ref_data
00536 
00537     # write eval data
00538     #writeEvalData(evalDict, ".")
00539 
00540     # create latex tables, all grouped by domain-name divided by settings/algos
00541     writeTex(evalDict, "output.tex", ref_data)
00542     
00543     if ref_data and opts.check_plans:
00544         checkPlans(evalDict, ref_data)
00545 
00546 if __name__ == "__main__":
00547     main()
00548