moveit_benchmark_statistics.py
Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 
00003 ######################################################################
00004 # Software License Agreement (BSD License)
00005 #
00006 #  Copyright (c) 2010, Rice University
00007 #  All rights reserved.
00008 #
00009 #  Redistribution and use in source and binary forms, with or without
00010 #  modification, are permitted provided that the following conditions
00011 #  are met:
00012 #
00013 #   * Redistributions of source code must retain the above copyright
00014 #     notice, this list of conditions and the following disclaimer.
00015 #   * Redistributions in binary form must reproduce the above
00016 #     copyright notice, this list of conditions and the following
00017 #     disclaimer in the documentation and/or other materials provided
00018 #     with the distribution.
00019 #   * Neither the name of the Rice University nor the names of its
00020 #     contributors may be used to endorse or promote products derived
00021 #     from this software without specific prior written permission.
00022 #
00023 #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00024 #  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00025 #  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
00026 #  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
00027 #  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
00028 #  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
00029 #  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00030 #  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
00031 #  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00032 #  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
00033 #  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00034 #  POSSIBILITY OF SUCH DAMAGE.
00035 ######################################################################
00036 
00037 # Author: Mark Moll, Ioan Sucan, Dave Coleman
00038 
00039 from sys import argv, exit
00040 import os
00041 import sqlite3
00042 import datetime
00043 import matplotlib
00044 matplotlib.use('pdf')
00045 from matplotlib import __version__ as matplotlibversion
00046 from matplotlib.backends.backend_pdf import PdfPages
00047 import matplotlib.pyplot as plt
00048 import numpy as np
00049 from optparse import OptionParser, OptionGroup
00050 
00051 def read_benchmark_log(dbname, filenames):
00052     """Parse benchmark log files and store the parsed data in a sqlite3 database."""
00053 
00054     conn = sqlite3.connect(dbname)
00055     c = conn.cursor()
00056     c.execute('PRAGMA FOREIGN_KEYS = ON')
00057     c.execute("""CREATE TABLE IF NOT EXISTS experiments
00058         (id INTEGER PRIMARY KEY AUTOINCREMENT, name VARCHAR(512), totaltime REAL, timelimit REAL, hostname VARCHAR(1024), date DATETIME, setup TEXT)""")
00059     c.execute("""CREATE TABLE IF NOT EXISTS known_planner_configs
00060         (id INTEGER PRIMARY KEY AUTOINCREMENT, planner_name VARCHAR(512) NOT NULL, settings TEXT)""")
00061     for filename in filenames:
00062         print("Processing " + filename)
00063         logfile = open(filename,'r')
00064         expname =  logfile.readline().split()[-1]
00065         hostname = logfile.readline().split()[-1]
00066         date = " ".join(logfile.readline().split()[2:])
00067         goal_name = logfile.readline().split()[-1]
00068         
00069         # disabled the planning request part
00070         #logfile.readline() # skip <<<|
00071         #expsetup = ""
00072         #expline = logfile.readline()
00073         #while not expline.startswith("|>>>"):
00074         #    expsetup = expsetup + expline
00075         #    expline = logfile.readline()
00076         expsetup = ""
00077 
00078         timelimit = float(logfile.readline().split()[0])
00079         totaltime = float(logfile.readline().split()[0])
00080 
00081         c.execute('INSERT INTO experiments VALUES (?,?,?,?,?,?,?)',
00082               (None, expname, totaltime, timelimit, hostname, date, expsetup) )
00083         c.execute('SELECT last_insert_rowid()')
00084         experiment_id = c.fetchone()[0]
00085         num_planners = int(logfile.readline().split()[0])
00086 
00087         for i in range(num_planners):
00088             planner_name = logfile.readline()[:-1]
00089             print("Parsing data for " + planner_name)
00090 
00091             # read common data for planner
00092             num_common = int(logfile.readline().split()[0])
00093             settings = ""
00094             for j in range(num_common):
00095                 settings = settings + logfile.readline() + ';'
00096 
00097             # find planner id
00098             c.execute("SELECT id FROM known_planner_configs WHERE (planner_name=? AND settings=?)", (planner_name, settings,))
00099             p = c.fetchone()
00100             if p==None:
00101                 c.execute("INSERT INTO known_planner_configs VALUES (?,?,?)", (None, planner_name, settings,))
00102                 c.execute('SELECT last_insert_rowid()')
00103                 planner_id = c.fetchone()[0]
00104             else:
00105                 planner_id = p[0]
00106 
00107             # read run properties
00108 
00109             # number of properties to read from log file
00110             num_properties = int(logfile.readline().split()[0])
00111 
00112             # load a dictionary of properties and types
00113             # we keep the names of the properties in a list as well, to ensure the correct order of properties
00114             properties = {}
00115             basePropNames = ['experimentid', 'plannerid', 'goal_name']  # these are the ones not from the planner directly
00116             basePropValues = [experiment_id, planner_id, goal_name]
00117             propNames = []
00118             propNames.extend(basePropNames)
00119             for j in range(num_properties):
00120                 field = logfile.readline().split()
00121                 ftype = field[-1]
00122                 fname = "_".join(field[:-1])
00123                 properties[fname] = ftype
00124                 propNames.append(fname)
00125 
00126             # create the table, if needed
00127             table_columns = "experimentid INTEGER, plannerid INTEGER, goal_name VARCHAR(100)"
00128             for k, v in properties.iteritems():
00129                 table_columns = table_columns + ', ' + k + ' ' + v
00130             table_columns = table_columns + ", FOREIGN KEY(experimentid) REFERENCES experiments(id) ON DELETE CASCADE ON UPDATE CASCADE"
00131             table_columns = table_columns + ", FOREIGN KEY(plannerid) REFERENCES known_planner_configs(id) ON DELETE CASCADE ON UPDATE CASCADE"
00132 
00133             planner_table = 'planner_%s' % planner_name
00134             c.execute("CREATE TABLE IF NOT EXISTS `%s` (%s)" %  (planner_table, table_columns))
00135 
00136             # check if the table has all the needed columns; if not, add them
00137             c.execute('SELECT * FROM `%s`' % planner_table)
00138             added_columns = [ t[0] for t in c.description]
00139             for col in properties.keys():
00140                 if not col in added_columns:
00141                     c.execute('ALTER TABLE `' + planner_table + '` ADD ' + col + ' ' + properties[col] + ';')
00142 
00143             # add measurements
00144             insert_fmt_str = 'INSERT INTO `' + planner_table + '` (' + ','.join(propNames) + ') VALUES (' + ','.join('?'*(num_properties + len(basePropNames))) + ')'  
00145 
00146             num_runs = int(logfile.readline().split()[0])
00147             for j in range(num_runs):
00148                 run = tuple(basePropValues + [None if len(x)==0 else float(x)
00149                     for x in logfile.readline().split('; ')[:-1]])
00150                 c.execute(insert_fmt_str, run)
00151 
00152             logfile.readline()
00153         logfile.close()
00154     conn.commit()
00155     c.close()
00156 
00157 def plot_attribute(cur, planners, attribute, typename):
00158     """Create a box plot for a particular attribute. It will include data for
00159     all planners that have data for this attribute."""
00160     plt.clf()
00161     ax = plt.gca()
00162     labels = []
00163     measurements = []
00164     nan_counts = []
00165     is_bool = True
00166     for planner in planners:
00167         cur.execute('SELECT * FROM `%s`' % planner)
00168         attributes = [ t[0] for t in cur.description]
00169         if attribute in attributes:
00170             cur.execute('SELECT `%s` FROM `%s` WHERE `%s` IS NOT NULL' % (attribute, planner, attribute))
00171             measurement = [ t[0] for t in cur.fetchall() ]
00172             cur.execute('SELECT count(*) FROM `%s` WHERE `%s` IS NULL' % (planner, attribute))
00173             nan_counts.append(cur.fetchone()[0])
00174             cur.execute('SELECT DISTINCT `%s` FROM `%s`' % (attribute, planner))
00175             is_bool = is_bool and set([t[0] for t in cur.fetchall() if not t[0]==None]).issubset(set([0,1]))
00176             measurements.append(measurement)
00177             labels.append(planner.replace('planner_geometric_','').replace('planner_control_',''))
00178     if is_bool:
00179         width = .5
00180         measurements_percentage = [sum(m)*100./len(m) for m in measurements]
00181         ind = range(len(measurements))
00182         plt.bar(ind, measurements_percentage, width)
00183         xtickNames = plt.xticks([x+width/2. for x in ind], labels, rotation=30)
00184         ax.set_ylabel(attribute.replace('_',' ') + ' (%)')
00185     else:
00186         if int(matplotlibversion.split('.')[0])<1:
00187             plt.boxplot(measurements, notch=0, sym='k+', vert=1, whis=1.5)
00188         else:
00189             plt.boxplot(measurements, notch=0, sym='k+', vert=1, whis=1.5, bootstrap=1000)
00190         ax.set_ylabel(attribute.replace('_',' '))
00191         xtickNames = plt.setp(ax,xticklabels=labels)
00192         plt.setp(xtickNames, rotation=25)
00193     ax.set_xlabel('Motion planning algorithm')
00194     ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey', alpha=0.5)
00195     if max(nan_counts)>0:
00196         maxy = max([max(y) for y in measurements])
00197         for i in range(len(labels)):
00198             x = i+width/2 if is_bool else i+1
00199             ax.text(x, .95*maxy, str(nan_counts[i]), horizontalalignment='center', size='small')
00200     plt.show()
00201 
00202 def plot_statistics(dbname, fname):
00203     """Create a PDF file with box plots for all attributes."""
00204     print("Generating plot...")
00205     conn = sqlite3.connect(dbname)
00206     c = conn.cursor()
00207     c.execute('PRAGMA FOREIGN_KEYS = ON')
00208     c.execute("SELECT name FROM sqlite_master WHERE type='table'")
00209     table_names = [ str(t[0]) for t in c.fetchall() ]
00210     planner_names = [ t for t in table_names if t.startswith('planner_') ]
00211     attributes = []
00212     types = {}
00213     experiments = []
00214     # merge possible attributes from all planners
00215     for p in planner_names:
00216         c.execute('SELECT * FROM `%s` LIMIT 1' % p)
00217         atr = [ t[0] for t in c.description]
00218         atr.remove('plannerid')
00219         atr.remove('experimentid')
00220         for a in atr:
00221             if a not in attributes:
00222                 c.execute('SELECT typeof(`%s`) FROM `%s` WHERE `%s` IS NOT NULL LIMIT 1' % (a, p, a))
00223                 attributes.append(a)
00224                 types[a] = c.fetchone()[0]
00225         c.execute('SELECT DISTINCT experimentid FROM `%s`' % p)
00226         eid = [t[0] for t in c.fetchall() if not t[0]==None]
00227         for e in eid:
00228             if e not in experiments:
00229                 experiments.append(e)
00230     attributes.sort()
00231 
00232     pp = PdfPages(fname)
00233     for atr in attributes:
00234         if types[atr]=='integer' or types[atr]=='real':
00235             plot_attribute(c, planner_names, atr, types[atr])
00236             pp.savefig(plt.gcf())
00237     plt.clf()
00238     pagey = 0.9
00239     pagex = 0.06
00240     for e in experiments:
00241         # get the number of runs, per planner, for this experiment
00242         runcount = []
00243         for p in planner_names:
00244             c.execute('SELECT count(*) FROM `%s` WHERE experimentid = %s' % (p, e))
00245             runcount.append(c.fetchone()[0])
00246 
00247         # check if this number is the same for all planners
00248         runs = "Number of averaged runs: "
00249         if len([r for r in runcount if not r == runcount[0]]) > 0:
00250             runs = runs + ", ".join([planner_names[i].replace('planner_geometric_','').replace('planner_control_','') +
00251                          "=" + str(runcount[i]) for i in range(len(runcount))])
00252         else:
00253             runs = runs + str(runcount[0])
00254 
00255         c.execute('SELECT name, timelimit FROM experiments WHERE id = %s' % e)
00256         d = c.fetchone()
00257         plt.figtext(pagex, pagey, "Experiment '%s'" % d[0])
00258         plt.figtext(pagex, pagey-0.05, runs)
00259         plt.figtext(pagex, pagey-0.10, "Time limit per run: %s seconds" % d[1])
00260         pagey -= 0.22
00261     plt.show()
00262     pp.savefig(plt.gcf())
00263     pp.close()
00264 
00265 def save_as_mysql(dbname, mysqldump):
00266     # See http://stackoverflow.com/questions/1067060/perl-to-python
00267     import re
00268     print("Saving as MySQL dump file...")
00269 
00270     conn = sqlite3.connect(dbname)
00271     mysqldump = open(mysqldump,'w')
00272 
00273     # make sure all tables are dropped in an order that keepd foreign keys valid
00274     c = conn.cursor()
00275     c.execute("SELECT name FROM sqlite_master WHERE type='table'")
00276     table_names = [ str(t[0]) for t in c.fetchall() ]
00277     c.close()
00278     last = ['experiments', 'known_planner_configs']
00279     for table in table_names:
00280         if table.startswith("sqlite"):
00281             continue
00282         if not table in last:
00283             mysqldump.write("DROP TABLE IF EXISTS `%s`;\n" % table)
00284     for table in last:
00285         if table in table_names:
00286             mysqldump.write("DROP TABLE IF EXISTS `%s`;\n" % table)
00287 
00288     for line in conn.iterdump():
00289         process = False
00290         for nope in ('BEGIN TRANSACTION','COMMIT',
00291             'sqlite_sequence','CREATE UNIQUE INDEX', 'CREATE VIEW'):
00292             if nope in line: break
00293         else:
00294             process = True
00295         if not process: continue
00296         line = re.sub(r"[\n\r\t ]+", " ", line)
00297         m = re.search('CREATE TABLE ([a-zA-Z0-9_]*)(.*)', line)
00298         if m:
00299             name, sub = m.groups()
00300             sub = sub.replace('"','`')
00301             line = '''CREATE TABLE IF NOT EXISTS %(name)s%(sub)s'''
00302             line = line % dict(name=name, sub=sub)
00303             # make sure we use an engine that supports foreign keys
00304             line = line.rstrip("\n\t ;") + " ENGINE = InnoDB;\n"
00305         else:
00306             m = re.search('INSERT INTO "([a-zA-Z0-9_]*)"(.*)', line)
00307             if m:
00308                 line = 'INSERT INTO %s%s\n' % m.groups()
00309                 line = line.replace('"', r'\"')
00310                 line = line.replace('"', "'")
00311 
00312         line = re.sub(r"([^'])'t'(.)", "\\1THIS_IS_TRUE\\2", line)
00313         line = line.replace('THIS_IS_TRUE', '1')
00314         line = re.sub(r"([^'])'f'(.)", "\\1THIS_IS_FALSE\\2", line)
00315         line = line.replace('THIS_IS_FALSE', '0')
00316         line = line.replace('AUTOINCREMENT', 'AUTO_INCREMENT')
00317         mysqldump.write(line)
00318     mysqldump.close()
00319 
00320 def generate_csv(dbname, fname):
00321     """Create a csv file with all experiments combined into one list."""
00322     print("Generating CSV output...")
00323     
00324     # Open CSV File
00325     csv = open(fname, 'w')
00326     
00327     # Connect to database
00328     conn = sqlite3.connect(dbname)
00329     cursor = conn.cursor()
00330     cursor.execute('PRAGMA FOREIGN_KEYS = ON')
00331 
00332     # Get planner tables
00333     cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
00334     table_names = [ str(t[0]) for t in cursor.fetchall() ]
00335     planner_names = [ t for t in table_names if t.startswith('planner_') ]
00336 
00337     # Create vectors
00338     attributes = []
00339     types = {}
00340     experiments = []
00341 
00342     # merge possible attributes from all planners
00343     for planner_name in planner_names:
00344         cursor.execute('SELECT * FROM `%s` LIMIT 1' % planner_name)
00345         atr = [ t[0] for t in cursor.description]
00346         atr.remove('plannerid')
00347         atr.remove('experimentid')
00348         for attribute in atr:
00349             if attribute not in attributes:
00350                 cursor.execute('SELECT typeof(`%s`) FROM `%s` WHERE `%s` IS NOT NULL LIMIT 1' % (attribute, planner_name, attribute))
00351                 attributes.append(attribute) # add this new attribute (first time seen)
00352                 types[attribute] = cursor.fetchone()[0]
00353 
00354         # Find new exeperiments for this planner table and add to our experiment vector
00355         cursor.execute('SELECT DISTINCT experimentid FROM `%s`' % planner_name)
00356         experiment_ids = [t[0] for t in cursor.fetchall() if not t[0]==None]
00357         for experiment_id in experiment_ids:
00358             if experiment_id not in experiments:
00359                 experiments.append(experiment_id)
00360 
00361     # Sort all found attributes
00362     attributes.sort(reverse=True)
00363 
00364     # Create header of the CSV
00365     csv.write('planner_type')
00366     for atr in attributes:
00367         #if types[atr]=='integer' or types[atr]=='real':
00368         csv.write(", %s"%atr)
00369     csv.write('\n') # new line
00370 
00371     # Start creating CSV file by polling each planner table and seperating its data into proper column
00372     # format, leaving blanks where planner is missing possible attribute data
00373     for planner_name in planner_names:
00374         cursor.execute('SELECT * FROM `%s`' % planner_name)
00375         # get this planner's attributes
00376         planner_attributes = [ t[0] for t in cursor.description]
00377         #print>>csv, planner_attributes
00378 
00379         # loop through each row of the planner experiments, aka each 'run'
00380         for run in cursor.fetchall():
00381             # write a *simplified* planner name
00382             name_short = planner_name.strip('planner')
00383             name_short = name_short.strip('_OMPL_')
00384             name_short = name_short.replace('[','_')
00385             name_short = name_short.strip('kConfigDefault]')
00386             csv.write(name_short)
00387             # loop through each global attribute
00388             for atr in attributes:
00389                 # find the global attribute in this table if it exists
00390                 if atr in planner_attributes:
00391                     # output value
00392                     index_of_attr = planner_attributes.index(atr)
00393                     csv.write(", %s" %run[index_of_attr])
00394                 else:
00395                     csv.write(", ")
00396             # done with this line
00397             csv.write("\n")
00398 
00399 
00400 
00401 if __name__ == "__main__":
00402     usage = """%prog [options] [<benchmark.log> ...]"""
00403     parser = OptionParser(usage)
00404     parser.add_option("-d", "--database", dest="dbname", default="benchmark.db",
00405         help="Filename of benchmark database [default: %default]")
00406     parser.add_option("-v", "--view", action="store_true", dest="view", default=False,
00407         help="Compute the views for best planner configurations")
00408     parser.add_option("-p", "--plot", dest="plot", default=None,
00409         help="Create a PDF of plots")
00410     parser.add_option("-c", "--csv", dest="csv", default=None,
00411         help="Create a CSV of combined experiments")
00412     parser.add_option("-m", "--mysql", dest="mysqldb", default=None,
00413         help="Save SQLite3 database as a MySQL dump file")
00414     parser.add_option("-o", "--overwrite", action="store_true", dest="overwrite", default=False,
00415         help="Use this flag to enable overwriting a previous database file with new benchmarks")
00416 
00417     if len(argv) == 1:
00418         parser.print_help()
00419 
00420     (options, args) = parser.parse_args()
00421 
00422     if len(args) > 0:
00423         # Check if user wants to start a new database (delete old one)
00424         if options.overwrite:
00425             try:
00426                 os.remove(options.dbname)
00427             except OSError:
00428                 pass
00429         read_benchmark_log(options.dbname, args)
00430     
00431     if options.plot:
00432         plot_statistics(options.dbname, options.plot)
00433 
00434     if options.csv:
00435         generate_csv(options.dbname, options.csv)
00436 
00437     if options.mysqldb:
00438         save_as_mysql(options.dbname, options.mysqldb)
00439 


benchmarks
Author(s): Ioan Sucan
autogenerated on Mon Oct 6 2014 02:32:28