39 from sys
import argv, exit
40 from os.path
import basename, splitext
45 from matplotlib
import __version__
as matplotlibversion
46 from matplotlib.backends.backend_pdf
import PdfPages
47 import matplotlib.pyplot
as plt
49 from math
import floor
50 from optparse
import OptionParser, OptionGroup
56 start_pos = filevar.tell()
57 tokens = filevar.readline().split()
58 for token_index
in expected_tokens:
59 if not tokens[token_index] == expected_tokens[token_index]:
61 filevar.seek(start_pos)
63 return tokens[desired_token_index]
66 return readLogValue(filevar, desired_token_index, expected_tokens)
69 result =
readLogValue(filevar, desired_token_index, expected_tokens)
71 raise Exception(
"Unable to read " + name)
75 if not line.startswith(prefix):
76 raise Exception(
"Expected prefix " + prefix +
" was not found")
80 start_pos = filevar.tell()
81 line = filevar.readline()
82 if not line.startswith(
"<<<|"):
83 filevar.seek(start_pos)
86 line = filevar.readline()
87 while not line.startswith(
'|>>>'):
89 line = filevar.readline()
91 raise Exception(
"Expected token |>>> missing")
97 line = filevar.readline()
98 while not line.startswith(
'|>>>'):
100 line = filevar.readline()
102 raise Exception(
"Expected token |>>> missing")
107 """Parse benchmark log files and store the parsed data in a sqlite3 database.""" 109 conn = sqlite3.connect(dbname)
111 c.execute(
'PRAGMA FOREIGN_KEYS = ON')
114 c.executescript(
"""CREATE TABLE IF NOT EXISTS experiments 115 (id INTEGER PRIMARY KEY AUTOINCREMENT, name VARCHAR(512), 116 totaltime REAL, timelimit REAL, memorylimit REAL, runcount INTEGER, 117 version VARCHAR(128), hostname VARCHAR(1024), cpuinfo TEXT, 118 date DATETIME, seed INTEGER, setup TEXT); 119 CREATE TABLE IF NOT EXISTS plannerConfigs 120 (id INTEGER PRIMARY KEY AUTOINCREMENT, 121 name VARCHAR(512) NOT NULL, settings TEXT); 122 CREATE TABLE IF NOT EXISTS enums 123 (name VARCHAR(512), value INTEGER, description TEXT, 124 PRIMARY KEY (name, value)); 125 CREATE TABLE IF NOT EXISTS runs 126 (id INTEGER PRIMARY KEY AUTOINCREMENT, experimentid INTEGER, plannerid INTEGER, 127 FOREIGN KEY (experimentid) REFERENCES experiments(id) ON DELETE CASCADE, 128 FOREIGN KEY (plannerid) REFERENCES plannerConfigs(id) ON DELETE CASCADE); 129 CREATE TABLE IF NOT EXISTS progress 130 (runid INTEGER, time REAL, PRIMARY KEY (runid, time), 131 FOREIGN KEY (runid) REFERENCES runs(id) ON DELETE CASCADE)""")
133 for filename
in filenames:
134 print(
'Processing ' + filename)
135 logfile = open(filename,
'r') 136 start_pos = logfile.tell() 140 logfile.seek(start_pos)
145 version =
' '.join([libname, version])
148 date =
' '.join(
ensurePrefix(logfile.readline(),
"Starting").split()[2:])
152 timelimit = float(
readRequiredLogValue(
"time limit", logfile, 0, {-3 :
"seconds", -2 :
"per", -1 :
"run"}))
153 memorylimit = float(
readRequiredLogValue(
"memory limit", logfile, 0, {-3 :
"MB", -2 :
"per", -1 :
"run"}))
156 if nrrunsOrNone !=
None:
157 nrruns = int(nrrunsOrNone)
158 totaltime = float(
readRequiredLogValue(
"total time", logfile, 0, {-3 :
"collect", -2 :
"the", -1 :
"data"}))
161 if numEnumsOrNone !=
None:
162 numEnums = int(numEnumsOrNone)
163 for i
in range(numEnums):
164 enum = logfile.readline()[:-1].split(
'|')
165 c.execute(
'SELECT * FROM enums WHERE name IS "%s"' % enum[0])
166 if c.fetchone() ==
None:
167 for j
in range(len(enum)-1):
168 c.execute(
'INSERT INTO enums VALUES (?,?,?)',
169 (enum[0],j,enum[j+1]))
170 c.execute(
'INSERT INTO experiments VALUES (?,?,?,?,?,?,?,?,?,?,?,?)',
171 (
None, expname, totaltime, timelimit, memorylimit, nrruns,
172 version, hostname, cpuinfo, date, rseed, expsetup) )
173 experimentId = c.lastrowid
175 for i
in range(numPlanners):
176 plannerName = logfile.readline()[:-1]
177 print(
'Parsing data for ' + plannerName)
180 numCommon = int(logfile.readline().split()[0])
182 for j
in range(numCommon):
183 settings = settings + logfile.readline() +
';' 186 c.execute(
'SELECT id FROM plannerConfigs WHERE (name=? AND settings=?)',
187 (plannerName, settings,))
190 c.execute(
'INSERT INTO plannerConfigs VALUES (?,?,?)',
191 (
None, plannerName, settings,))
192 plannerId = c.lastrowid
197 c.execute(
'PRAGMA table_info(runs)')
198 columnNames = [col[1]
for col
in c.fetchall()]
201 numProperties = int(logfile.readline().split()[0])
202 propertyNames = [
'experimentid',
'plannerid']
203 for j
in range(numProperties):
204 field = logfile.readline().split()
205 propertyType = field[-1]
206 propertyName =
'_'.join(field[:-1])
207 if propertyName
not in columnNames:
208 c.execute(
'ALTER TABLE runs ADD %s %s' % (propertyName, propertyType))
209 propertyNames.append(propertyName)
211 insertFmtStr =
'INSERT INTO runs (' +
','.join(propertyNames) + \
212 ') VALUES (' +
','.join(
'?'*len(propertyNames)) +
')' 213 numRuns = int(logfile.readline().split()[0])
215 for j
in range(numRuns):
216 values = tuple([experimentId, plannerId] + \
217 [
None if len(x) == 0
or x ==
'nan' or x ==
'inf' else x
218 for x
in logfile.readline().split(
'; ')[:-1]])
219 c.execute(insertFmtStr, values)
222 runIds.append(c.lastrowid)
224 nextLine = logfile.readline().strip()
229 c.execute(
'PRAGMA table_info(progress)')
230 columnNames = [col[1]
for col
in c.fetchall()]
233 numProgressProperties = int(nextLine.split()[0])
234 progressPropertyNames = [
'runid']
235 for i
in range(numProgressProperties):
236 field = logfile.readline().split()
237 progressPropertyType = field[-1]
238 progressPropertyName =
"_".join(field[:-1])
239 if progressPropertyName
not in columnNames:
240 c.execute(
'ALTER TABLE progress ADD %s %s' %
241 (progressPropertyName, progressPropertyType))
242 progressPropertyNames.append(progressPropertyName)
244 insertFmtStr =
'INSERT INTO progress (' + \
245 ','.join(progressPropertyNames) +
') VALUES (' + \
246 ','.join(
'?'*len(progressPropertyNames)) +
')' 247 numRuns = int(logfile.readline().split()[0])
248 for j
in range(numRuns):
249 dataSeries = logfile.readline().split(
';')[:-1]
250 for dataSample
in dataSeries:
251 values = tuple([runIds[j]] + \
252 [
None if len(x) == 0
or x ==
'nan' or x ==
'inf' else x
253 for x
in dataSample.split(
',')[:-1]])
255 c.execute(insertFmtStr, values)
256 except sqlite3.IntegrityError:
257 print(
'Ignoring duplicate progress data. Consider increasing ompl::tools::Benchmark::Request::timeBetweenUpdates.')
266 """Create a plot for a particular attribute. It will include data for 267 all planners that have data for this attribute.""" 271 if typename ==
'ENUM':
272 cur.execute(
'SELECT description FROM enums where name IS "%s"' % attribute)
273 descriptions = [ t[0]
for t
in cur.fetchall() ]
274 numValues = len(descriptions)
275 for planner
in planners:
276 cur.execute(
'SELECT %s FROM runs WHERE plannerid = %s AND %s IS NOT NULL' \
277 % (attribute, planner[0], attribute))
278 measurement = [ t[0]
for t
in cur.fetchall()
if t[0] !=
None ]
279 if len(measurement) > 0:
280 cur.execute(
'SELECT count(*) FROM runs WHERE plannerid = %s AND %s IS NULL' \
281 % (planner[0], attribute))
282 nanCounts.append(cur.fetchone()[0])
283 labels.append(planner[1])
284 if typename ==
'ENUM':
285 scale = 100. / len(measurement)
286 measurements.append([measurement.count(i)*scale
for i
in range(numValues)])
288 measurements.append(measurement)
290 if len(measurements)==0:
291 print(
'Skipping "%s": no available measurements' % attribute)
296 if typename ==
'ENUM':
298 measurements = np.transpose(np.vstack(measurements))
299 colsum = np.sum(measurements, axis=1)
300 rows = np.where(colsum != 0)[0]
301 heights = np.zeros((1,measurements.shape[1]))
302 ind = range(measurements.shape[1])
305 plt.bar(ind, measurements[i], width, bottom=heights[0],
306 color=matplotlib.cm.hot(int(floor(i*256/numValues))),
307 label=descriptions[i])
308 heights = heights + measurements[i]
309 xtickNames = plt.xticks([x + width / 2.
for x
in ind], labels, rotation=30, fontsize=8, ha=
'right')
310 ax.set_ylabel(attribute.replace(
'_',
' ') +
' (%)')
311 box = ax.get_position()
312 ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
313 props = matplotlib.font_manager.FontProperties()
314 props.set_size(
'small')
315 ax.legend(loc=
'center left', bbox_to_anchor=(1, 0.5), prop = props)
316 elif typename ==
'BOOLEAN':
318 measurementsPercentage = [sum(m) * 100. / len(m)
for m
in measurements]
319 ind = range(len(measurements))
320 plt.bar(ind, measurementsPercentage, width)
325 xtickNames = plt.xticks([x + width / 2.
for x
in ind], labels, rotation=30, fontsize=8, ha=
'right')
326 ax.set_ylabel(attribute.replace(
'_',
' ') +
' (%)')
327 plt.subplots_adjust(bottom=0.3)
329 if int(matplotlibversion.split(
'.')[0])<1:
330 plt.boxplot(measurements, notch=0, sym=
'k+', vert=1, whis=1.5)
332 plt.boxplot(measurements, notch=0, sym=
'k+', vert=1, whis=1.5, bootstrap=1000)
333 ax.set_ylabel(attribute.replace(
'_',
' '))
342 xtickNames = plt.setp(ax,xticklabels=labels)
343 plt.setp(xtickNames, rotation=30, fontsize=8, ha=
'right')
344 for tick
in ax.xaxis.get_major_ticks():
345 tick.label.set_fontsize(8)
346 plt.subplots_adjust(bottom=0.3)
347 ax.set_xlabel(
'Motion planning algorithm', fontsize=12)
348 ax.yaxis.grid(
True, linestyle=
'-', which=
'major', color=
'lightgrey', alpha=0.5)
350 maxy = max([max(y)
for y
in measurements])
351 for i
in range(len(labels)):
352 x = i+width/2
if typename==
'BOOLEAN' else i+1
358 """Plot data for a single planner progress attribute. Will create an 359 average time-plot with error bars of the attribute over all runs for 362 import numpy.ma
as ma
366 ax.set_xlabel(
'time (s)')
367 ax.set_ylabel(attribute.replace(
'_',
' '))
369 for planner
in planners:
370 cur.execute(
"""SELECT count(progress.%s) FROM progress INNER JOIN runs 371 ON progress.runid = runs.id AND runs.plannerid=%s 372 AND progress.%s IS NOT NULL""" \
373 % (attribute, planner[0], attribute))
374 if cur.fetchone()[0] > 0:
375 plannerNames.append(planner[1])
376 cur.execute(
"""SELECT DISTINCT progress.runid FROM progress INNER JOIN runs 377 WHERE progress.runid=runs.id AND runs.plannerid=?""", (planner[0],))
378 runids = [t[0]
for t
in cur.fetchall()]
383 cur.execute(
'SELECT time, %s FROM progress WHERE runid = %s ORDER BY time' % (attribute,r))
384 (time, data) = zip(*(cur.fetchall()))
385 timeTable.append(time)
386 dataTable.append(data)
391 fewestSamples = min(len(time[:])
for time
in timeTable)
392 times = np.array(timeTable[0][:fewestSamples])
393 dataArrays = np.array([data[:fewestSamples]
for data
in dataTable])
394 filteredData = ma.masked_array(dataArrays, np.equal(dataArrays,
None), dtype=float)
396 means = np.mean(filteredData, axis=0)
397 stddevs = np.std(filteredData, axis=0, ddof=1)
400 plt.errorbar(times, means, yerr=2*stddevs, errorevery=max(1, len(times) // 20))
401 ax.legend(plannerNames)
402 if len(plannerNames)>0:
408 """Create a PDF file with box plots for all attributes.""" 409 print(
"Generating plots...")
410 conn = sqlite3.connect(dbname)
412 c.execute(
'PRAGMA FOREIGN_KEYS = ON')
413 c.execute(
'SELECT id, name FROM plannerConfigs')
414 planners = [(t[0],t[1].replace(
'geometric_',
'').replace(
'control_',
''))
415 for t
in c.fetchall()]
416 c.execute(
'PRAGMA table_info(runs)')
417 colInfo = c.fetchall()[3:]
421 if col[2] ==
'BOOLEAN' or col[2] ==
'ENUM' or \
422 col[2] ==
'INTEGER' or col[2] ==
'REAL':
424 pp.savefig(plt.gcf())
426 c.execute(
'PRAGMA table_info(progress)')
427 colInfo = c.fetchall()[2:]
430 pp.savefig(plt.gcf())
435 c.execute(
"""SELECT id, name, timelimit, memorylimit FROM experiments""")
436 experiments = c.fetchall()
437 for experiment
in experiments:
438 c.execute(
"""SELECT count(*) FROM runs WHERE runs.experimentid = %d 439 GROUP BY runs.plannerid""" % experiment[0])
440 numRuns = [run[0]
for run
in c.fetchall()]
441 numRuns = numRuns[0]
if len(set(numRuns)) == 1
else ','.join(numRuns)
443 plt.figtext(pagex, pagey,
'Experiment "%s"' % experiment[1])
444 plt.figtext(pagex, pagey-0.05,
'Number of averaged runs: %d' % numRuns)
445 plt.figtext(pagex, pagey-0.10,
"Time limit per run: %g seconds" % experiment[2])
446 plt.figtext(pagex, pagey-0.15,
"Memory limit per run: %g MB" % experiment[3])
449 pp.savefig(plt.gcf())
455 print(
"Saving as MySQL dump file...")
457 conn = sqlite3.connect(dbname)
458 mysqldump = open(mysqldump,
'w')
462 c.execute(
"SELECT name FROM sqlite_master WHERE type='table'")
463 table_names = [ str(t[0])
for t
in c.fetchall() ]
465 last = [
'experiments',
'planner_configs']
466 for table
in table_names:
467 if table.startswith(
"sqlite"):
469 if not table
in last:
470 mysqldump.write(
"DROP TABLE IF EXISTS `%s`;\n" % table)
472 if table
in table_names:
473 mysqldump.write(
"DROP TABLE IF EXISTS `%s`;\n" % table)
475 for line
in conn.iterdump():
477 for nope
in (
'BEGIN TRANSACTION',
'COMMIT',
478 'sqlite_sequence',
'CREATE UNIQUE INDEX',
'CREATE VIEW'):
479 if nope
in line:
break 482 if not process:
continue 483 line = re.sub(
r"[\n\r\t ]+",
" ", line)
484 m = re.search(
'CREATE TABLE ([a-zA-Z0-9_]*)(.*)', line)
486 name, sub = m.groups()
487 sub = sub.replace(
'"',
'`')
488 line =
'''CREATE TABLE IF NOT EXISTS %(name)s%(sub)s''' 489 line = line % dict(name=name, sub=sub)
491 line = line.rstrip(
"\n\t ;") +
" ENGINE = InnoDB;\n" 493 m = re.search(
'INSERT INTO "([a-zA-Z0-9_]*)"(.*)', line)
495 line =
'INSERT INTO %s%s\n' % m.groups()
496 line = line.replace(
'"',
r'\"')
497 line = line.replace(
'"',
"'")
499 line = re.sub(
r"([^'])'t'(.)",
"\\1THIS_IS_TRUE\\2", line)
500 line = line.replace(
'THIS_IS_TRUE',
'1')
501 line = re.sub(
r"([^'])'f'(.)",
"\\1THIS_IS_FALSE\\2", line)
502 line = line.replace(
'THIS_IS_FALSE',
'0')
503 line = line.replace(
'AUTOINCREMENT',
'AUTO_INCREMENT')
504 mysqldump.write(line)
508 conn = sqlite3.connect(dbname)
510 c.execute(
'PRAGMA FOREIGN_KEYS = ON')
511 c.execute(
'PRAGMA table_info(runs)')
514 if 'simplification_time' in [col[1]
for col
in c.fetchall()]:
515 s0 =
"""SELECT plannerid, plannerConfigs.name AS plannerName, experimentid, solved, time + simplification_time AS total_time 516 FROM plannerConfigs INNER JOIN experiments INNER JOIN runs 517 ON plannerConfigs.id=runs.plannerid AND experiments.id=runs.experimentid""" 519 s0 =
"""SELECT plannerid, plannerConfigs.name AS plannerName, experimentid, solved, time AS total_time 520 FROM plannerConfigs INNER JOIN experiments INNER JOIN runs 521 ON plannerConfigs.id=runs.plannerid AND experiments.id=runs.experimentid""" 522 s1 =
"""SELECT plannerid, plannerName, experimentid, AVG(solved) AS avg_solved, AVG(total_time) AS avg_total_time 523 FROM (%s) GROUP BY plannerid, experimentid""" % s0
524 s2 =
"""SELECT plannerid, experimentid, MIN(avg_solved) AS avg_solved, avg_total_time 525 FROM (%s) GROUP BY plannerName, experimentid ORDER BY avg_solved DESC, avg_total_time ASC""" % s1
526 c.execute(
'DROP VIEW IF EXISTS bestPlannerConfigsPerExperiment')
527 c.execute(
'CREATE VIEW IF NOT EXISTS bestPlannerConfigsPerExperiment AS %s' % s2)
529 s1 =
"""SELECT plannerid, plannerName, AVG(solved) AS avg_solved, AVG(total_time) AS avg_total_time 530 FROM (%s) GROUP BY plannerid""" % s0
531 s2 =
"""SELECT plannerid, MIN(avg_solved) AS avg_solved, avg_total_time 532 FROM (%s) GROUP BY plannerName ORDER BY avg_solved DESC, avg_total_time ASC""" % s1
533 c.execute(
'DROP VIEW IF EXISTS bestPlannerConfigs')
534 c.execute(
'CREATE VIEW IF NOT EXISTS bestPlannerConfigs AS %s' % s2)
539 if __name__ ==
"__main__":
540 usage =
"""%prog [options] [<benchmark.log> ...]""" 541 parser = OptionParser(
"A script to parse benchmarking results.\n" + usage)
542 parser.add_option(
"-d",
"--database", dest=
"dbname", default=
"benchmark.db",
543 help=
"Filename of benchmark database [default: %default]")
544 parser.add_option(
"-v",
"--view", action=
"store_true", dest=
"view", default=
False,
545 help=
"Compute the views for best planner configurations")
546 parser.add_option(
"-p",
"--plot", dest=
"plot", default=
None,
547 help=
"Create a PDF of plots with the filename provided")
548 parser.add_option(
"-m",
"--mysql", dest=
"mysqldb", default=
None,
549 help=
"Save SQLite3 database as a MySQL dump file")
550 (options, args) = parser.parse_args()
553 parser.error(
"No arguments were provided. Please provide full path of log file")
def readOptionalMultilineValue(filevar)
def plotAttribute(cur, planners, attribute, typename)
def plotStatistics(dbname, fname)
def readRequiredLogValue(name, filevar, desired_token_index, expected_tokens={})
def saveAsMysql(dbname, mysqldump)
def readOptionalLogValue(filevar, desired_token_index, expected_tokens={})
def readLogValue(filevar, desired_token_index, expected_tokens)
def readRequiredMultilineValue(filevar)
def readBenchmarkLog(dbname, filenames)
def ensurePrefix(line, prefix)
def plotProgressAttribute(cur, planners, attribute)