39 from sys
import argv, exit
40 from os.path
import basename, splitext
45 from matplotlib
import __version__
as matplotlibversion
46 from matplotlib.backends.backend_pdf
import PdfPages
47 import matplotlib.pyplot
as plt
49 from math
import floor
50 from optparse
import OptionParser, OptionGroup
56 start_pos = filevar.tell()
57 tokens = filevar.readline().split()
58 for token_index
in expected_tokens:
59 if not tokens[token_index] == expected_tokens[token_index]:
61 filevar.seek(start_pos)
63 return tokens[desired_token_index]
66 return readLogValue(filevar, desired_token_index, expected_tokens)
69 result =
readLogValue(filevar, desired_token_index, expected_tokens)
71 raise Exception(
"Unable to read " + name)
75 if not line.startswith(prefix):
76 raise Exception(
"Expected prefix " + prefix +
" was not found")
80 start_pos = filevar.tell()
81 line = filevar.readline()
82 if not line.startswith(
"<<<|"):
83 filevar.seek(start_pos)
86 line = filevar.readline()
87 while not line.startswith(
'|>>>'):
89 line = filevar.readline()
91 raise Exception(
"Expected token |>>> missing")
97 line = filevar.readline()
98 while not line.startswith(
'|>>>'):
100 line = filevar.readline()
102 raise Exception(
"Expected token |>>> missing")
107 """Parse benchmark log files and store the parsed data in a sqlite3 database.""" 109 conn = sqlite3.connect(dbname)
111 c.execute(
'PRAGMA FOREIGN_KEYS = ON')
114 c.executescript(
"""CREATE TABLE IF NOT EXISTS experiments 115 (id INTEGER PRIMARY KEY AUTOINCREMENT, name VARCHAR(512), 116 totaltime REAL, timelimit REAL, memorylimit REAL, runcount INTEGER, 117 version VARCHAR(128), hostname VARCHAR(1024), cpuinfo TEXT, 118 date DATETIME, seed INTEGER, setup TEXT); 119 CREATE TABLE IF NOT EXISTS plannerConfigs 120 (id INTEGER PRIMARY KEY AUTOINCREMENT, 121 name VARCHAR(512) NOT NULL, settings TEXT); 122 CREATE TABLE IF NOT EXISTS enums 123 (name VARCHAR(512), value INTEGER, description TEXT, 124 PRIMARY KEY (name, value)); 125 CREATE TABLE IF NOT EXISTS runs 126 (id INTEGER PRIMARY KEY AUTOINCREMENT, experimentid INTEGER, plannerid INTEGER, 127 FOREIGN KEY (experimentid) REFERENCES experiments(id) ON DELETE CASCADE, 128 FOREIGN KEY (plannerid) REFERENCES plannerConfigs(id) ON DELETE CASCADE); 129 CREATE TABLE IF NOT EXISTS progress 130 (runid INTEGER, time REAL, PRIMARY KEY (runid, time), 131 FOREIGN KEY (runid) REFERENCES runs(id) ON DELETE CASCADE)""")
133 for filename
in filenames:
134 print(
'Processing ' + filename)
135 logfile = open(filename,
'r') 136 start_pos = logfile.tell() 140 logfile.seek(start_pos)
145 version =
' '.join([libname, version])
148 date =
' '.join(
ensurePrefix(logfile.readline(),
"Starting").split()[2:])
152 timelimit = float(
readRequiredLogValue(
"time limit", logfile, 0, {-3 :
"seconds", -2 :
"per", -1 :
"run"}))
153 memorylimit = float(
readRequiredLogValue(
"memory limit", logfile, 0, {-3 :
"MB", -2 :
"per", -1 :
"run"}))
156 if nrrunsOrNone !=
None:
157 nrruns = int(nrrunsOrNone)
158 totaltime = float(
readRequiredLogValue(
"total time", logfile, 0, {-3 :
"collect", -2 :
"the", -1 :
"data"}))
161 if numEnumsOrNone !=
None:
162 numEnums = int(numEnumsOrNone)
163 for i
in range(numEnums):
164 enum = logfile.readline()[:-1].split(
'|')
165 c.execute(
'SELECT * FROM enums WHERE name IS "%s"' % enum[0])
166 if c.fetchone() ==
None:
167 for j
in range(len(enum)-1):
168 c.execute(
'INSERT INTO enums VALUES (?,?,?)',
169 (enum[0],j,enum[j+1]))
170 c.execute(
'INSERT INTO experiments VALUES (?,?,?,?,?,?,?,?,?,?,?,?)',
171 (
None, expname, totaltime, timelimit, memorylimit, nrruns,
172 version, hostname, cpuinfo, date, rseed, expsetup) )
173 experimentId = c.lastrowid
175 for i
in range(numPlanners):
176 plannerName = logfile.readline()[:-1]
177 print(
'Parsing data for ' + plannerName)
180 numCommon = int(logfile.readline().split()[0])
182 for j
in range(numCommon):
183 settings = settings + logfile.readline() +
';' 186 c.execute(
'SELECT id FROM plannerConfigs WHERE (name=? AND settings=?)',
187 (plannerName, settings,))
190 c.execute(
'INSERT INTO plannerConfigs VALUES (?,?,?)',
191 (
None, plannerName, settings,))
192 plannerId = c.lastrowid
197 c.execute(
'PRAGMA table_info(runs)')
198 columnNames = [col[1]
for col
in c.fetchall()]
201 numProperties = int(logfile.readline().split()[0])
202 propertyNames = [
'experimentid',
'plannerid']
203 for j
in range(numProperties):
204 field = logfile.readline().split()
205 propertyType = field[-1]
206 propertyName =
'_'.join(field[:-1])
207 if propertyName
not in columnNames:
208 c.execute(
'ALTER TABLE runs ADD %s %s' % (propertyName, propertyType))
209 propertyNames.append(propertyName)
211 insertFmtStr =
'INSERT INTO runs (' +
','.join(propertyNames) + \
212 ') VALUES (' +
','.join(
'?'*len(propertyNames)) +
')' 213 numRuns = int(logfile.readline().split()[0])
215 for j
in range(numRuns):
216 values = tuple([experimentId, plannerId] + \
217 [
None if len(x) == 0
or x ==
'nan' or x ==
'inf' else x
218 for x
in logfile.readline().split(
'; ')[:-1]])
219 c.execute(insertFmtStr, values)
222 runIds.append(c.lastrowid)
224 nextLine = logfile.readline().strip()
229 c.execute(
'PRAGMA table_info(progress)')
230 columnNames = [col[1]
for col
in c.fetchall()]
233 numProgressProperties = int(nextLine.split()[0])
234 progressPropertyNames = [
'runid']
235 for i
in range(numProgressProperties):
236 field = logfile.readline().split()
237 progressPropertyType = field[-1]
238 progressPropertyName =
"_".join(field[:-1])
239 if progressPropertyName
not in columnNames:
240 c.execute(
'ALTER TABLE progress ADD %s %s' %
241 (progressPropertyName, progressPropertyType))
242 progressPropertyNames.append(progressPropertyName)
244 insertFmtStr =
'INSERT INTO progress (' + \
245 ','.join(progressPropertyNames) +
') VALUES (' + \
246 ','.join(
'?'*len(progressPropertyNames)) +
')' 247 numRuns = int(logfile.readline().split()[0])
248 for j
in range(numRuns):
249 dataSeries = logfile.readline().split(
';')[:-1]
250 for dataSample
in dataSeries:
251 values = tuple([runIds[j]] + \
252 [
None if len(x) == 0
or x ==
'nan' or x ==
'inf' else x
253 for x
in dataSample.split(
',')[:-1]])
255 c.execute(insertFmtStr, values)
256 except sqlite3.IntegrityError:
257 print(
'Ignoring duplicate progress data. Consider increasing ompl::tools::Benchmark::Request::timeBetweenUpdates.')
266 """Create a plot for a particular attribute. It will include data for 267 all planners that have data for this attribute.""" 271 if typename ==
'ENUM':
272 cur.execute(
'SELECT description FROM enums where name IS "%s"' % attribute)
273 descriptions = [ t[0]
for t
in cur.fetchall() ]
274 numValues = len(descriptions)
275 for planner
in planners:
276 cur.execute(
'SELECT %s FROM runs WHERE plannerid = %s AND %s IS NOT NULL' \
277 % (attribute, planner[0], attribute))
278 measurement = [ t[0]
for t
in cur.fetchall()
if t[0] !=
None ]
279 if len(measurement) > 0:
280 cur.execute(
'SELECT count(*) FROM runs WHERE plannerid = %s AND %s IS NULL' \
281 % (planner[0], attribute))
282 nanCounts.append(cur.fetchone()[0])
283 labels.append(planner[1])
284 if typename ==
'ENUM':
285 scale = 100. / len(measurement)
286 measurements.append([measurement.count(i)*scale
for i
in range(numValues)])
288 measurements.append(measurement)
290 if len(measurements)==0:
291 print(
'Skipping "%s": no available measurements' % attribute)
296 if typename ==
'ENUM':
298 measurements = np.transpose(np.vstack(measurements))
299 colsum = np.sum(measurements, axis=1)
300 rows = np.where(colsum != 0)[0]
301 heights = np.zeros((1,measurements.shape[1]))
302 ind = range(measurements.shape[1])
305 plt.bar(ind, measurements[i], width, bottom=heights[0],
306 color=matplotlib.cm.hot(int(floor(i*256/numValues))),
307 label=descriptions[i])
308 heights = heights + measurements[i]
309 xtickNames = plt.xticks([x+width/2.
for x
in ind], labels, rotation=30)
310 ax.set_ylabel(attribute.replace(
'_',
' ') +
' (%)')
311 box = ax.get_position()
312 ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
313 props = matplotlib.font_manager.FontProperties()
314 props.set_size(
'small')
315 ax.legend(loc=
'center left', bbox_to_anchor=(1, 0.5), prop = props)
316 elif typename ==
'BOOLEAN':
318 measurementsPercentage = [sum(m) * 100. / len(m)
for m
in measurements]
319 ind = range(len(measurements))
320 plt.bar(ind, measurementsPercentage, width)
321 xtickNames = plt.xticks([x + width / 2.
for x
in ind], labels, rotation=30, fontsize=8)
322 ax.set_ylabel(attribute.replace(
'_',
' ') +
' (%)')
323 plt.subplots_adjust(bottom=0.3)
325 if int(matplotlibversion.split(
'.')[0])<1:
326 plt.boxplot(measurements, notch=0, sym=
'k+', vert=1, whis=1.5)
328 plt.boxplot(measurements, notch=0, sym=
'k+', vert=1, whis=1.5, bootstrap=1000)
329 ax.set_ylabel(attribute.replace(
'_',
' '))
334 xtickNames = plt.setp(ax,xticklabels=labels)
335 plt.setp(xtickNames, rotation=30)
336 for tick
in ax.xaxis.get_major_ticks():
337 tick.label.set_fontsize(8)
338 plt.subplots_adjust(bottom=0.3)
339 ax.set_xlabel(
'Motion planning algorithm')
340 ax.yaxis.grid(
True, linestyle=
'-', which=
'major', color=
'lightgrey', alpha=0.5)
342 maxy = max([max(y)
for y
in measurements])
343 for i
in range(len(labels)):
344 x = i+width/2
if typename==
'BOOLEAN' else i+1
345 ax.text(x, .95*maxy, str(nanCounts[i]), horizontalalignment=
'center', size=
'small')
349 """Plot data for a single planner progress attribute. Will create an 350 average time-plot with error bars of the attribute over all runs for 353 import numpy.ma
as ma
357 ax.set_xlabel(
'time (s)')
358 ax.set_ylabel(attribute.replace(
'_',
' '))
360 for planner
in planners:
361 cur.execute(
"""SELECT count(progress.%s) FROM progress INNER JOIN runs 362 ON progress.runid = runs.id AND runs.plannerid=%s 363 AND progress.%s IS NOT NULL""" \
364 % (attribute, planner[0], attribute))
365 if cur.fetchone()[0] > 0:
366 plannerNames.append(planner[1])
367 cur.execute(
"""SELECT DISTINCT progress.runid FROM progress INNER JOIN runs 368 WHERE progress.runid=runs.id AND runs.plannerid=?""", (planner[0],))
369 runids = [t[0]
for t
in cur.fetchall()]
374 cur.execute(
'SELECT time, %s FROM progress WHERE runid = %s ORDER BY time' % (attribute,r))
375 (time, data) = zip(*(cur.fetchall()))
376 timeTable.append(time)
377 dataTable.append(data)
382 fewestSamples = min(len(time[:])
for time
in timeTable)
383 times = np.array(timeTable[0][:fewestSamples])
384 dataArrays = np.array([data[:fewestSamples]
for data
in dataTable])
385 filteredData = ma.masked_array(dataArrays, np.equal(dataArrays,
None), dtype=float)
387 means = np.mean(filteredData, axis=0)
388 stddevs = np.std(filteredData, axis=0, ddof=1)
391 plt.errorbar(times, means, yerr=2*stddevs, errorevery=max(1, len(times) // 20))
392 ax.legend(plannerNames)
393 if len(plannerNames)>0:
399 """Create a PDF file with box plots for all attributes.""" 400 print(
"Generating plots...")
401 conn = sqlite3.connect(dbname)
403 c.execute(
'PRAGMA FOREIGN_KEYS = ON')
404 c.execute(
'SELECT id, name FROM plannerConfigs')
405 planners = [(t[0],t[1].replace(
'geometric_',
'').replace(
'control_',
''))
406 for t
in c.fetchall()]
407 c.execute(
'PRAGMA table_info(runs)')
408 colInfo = c.fetchall()[3:]
412 if col[2] ==
'BOOLEAN' or col[2] ==
'ENUM' or \
413 col[2] ==
'INTEGER' or col[2] ==
'REAL':
415 pp.savefig(plt.gcf())
417 c.execute(
'PRAGMA table_info(progress)')
418 colInfo = c.fetchall()[2:]
421 pp.savefig(plt.gcf())
426 c.execute(
"""SELECT id, name, timelimit, memorylimit FROM experiments""")
427 experiments = c.fetchall()
428 for experiment
in experiments:
429 c.execute(
"""SELECT count(*) FROM runs WHERE runs.experimentid = %d 430 GROUP BY runs.plannerid""" % experiment[0])
431 numRuns = [run[0]
for run
in c.fetchall()]
432 numRuns = numRuns[0]
if len(set(numRuns)) == 1
else ','.join(numRuns)
434 plt.figtext(pagex, pagey,
'Experiment "%s"' % experiment[1])
435 plt.figtext(pagex, pagey-0.05,
'Number of averaged runs: %d' % numRuns)
436 plt.figtext(pagex, pagey-0.10,
"Time limit per run: %g seconds" % experiment[2])
437 plt.figtext(pagex, pagey-0.15,
"Memory limit per run: %g MB" % experiment[3])
440 pp.savefig(plt.gcf())
446 print(
"Saving as MySQL dump file...")
448 conn = sqlite3.connect(dbname)
449 mysqldump = open(mysqldump,
'w')
453 c.execute(
"SELECT name FROM sqlite_master WHERE type='table'")
454 table_names = [ str(t[0])
for t
in c.fetchall() ]
456 last = [
'experiments',
'planner_configs']
457 for table
in table_names:
458 if table.startswith(
"sqlite"):
460 if not table
in last:
461 mysqldump.write(
"DROP TABLE IF EXISTS `%s`;\n" % table)
463 if table
in table_names:
464 mysqldump.write(
"DROP TABLE IF EXISTS `%s`;\n" % table)
466 for line
in conn.iterdump():
468 for nope
in (
'BEGIN TRANSACTION',
'COMMIT',
469 'sqlite_sequence',
'CREATE UNIQUE INDEX',
'CREATE VIEW'):
470 if nope
in line:
break 473 if not process:
continue 474 line = re.sub(
r"[\n\r\t ]+",
" ", line)
475 m = re.search(
'CREATE TABLE ([a-zA-Z0-9_]*)(.*)', line)
477 name, sub = m.groups()
478 sub = sub.replace(
'"',
'`')
479 line =
'''CREATE TABLE IF NOT EXISTS %(name)s%(sub)s''' 480 line = line % dict(name=name, sub=sub)
482 line = line.rstrip(
"\n\t ;") +
" ENGINE = InnoDB;\n" 484 m = re.search(
'INSERT INTO "([a-zA-Z0-9_]*)"(.*)', line)
486 line =
'INSERT INTO %s%s\n' % m.groups()
487 line = line.replace(
'"',
r'\"')
488 line = line.replace(
'"',
"'")
490 line = re.sub(
r"([^'])'t'(.)",
"\\1THIS_IS_TRUE\\2", line)
491 line = line.replace(
'THIS_IS_TRUE',
'1')
492 line = re.sub(
r"([^'])'f'(.)",
"\\1THIS_IS_FALSE\\2", line)
493 line = line.replace(
'THIS_IS_FALSE',
'0')
494 line = line.replace(
'AUTOINCREMENT',
'AUTO_INCREMENT')
495 mysqldump.write(line)
499 conn = sqlite3.connect(dbname)
501 c.execute(
'PRAGMA FOREIGN_KEYS = ON')
502 c.execute(
'PRAGMA table_info(runs)')
505 if 'simplification_time' in [col[1]
for col
in c.fetchall()]:
506 s0 =
"""SELECT plannerid, plannerConfigs.name AS plannerName, experimentid, solved, time + simplification_time AS total_time 507 FROM plannerConfigs INNER JOIN experiments INNER JOIN runs 508 ON plannerConfigs.id=runs.plannerid AND experiments.id=runs.experimentid""" 510 s0 =
"""SELECT plannerid, plannerConfigs.name AS plannerName, experimentid, solved, time AS total_time 511 FROM plannerConfigs INNER JOIN experiments INNER JOIN runs 512 ON plannerConfigs.id=runs.plannerid AND experiments.id=runs.experimentid""" 513 s1 =
"""SELECT plannerid, plannerName, experimentid, AVG(solved) AS avg_solved, AVG(total_time) AS avg_total_time 514 FROM (%s) GROUP BY plannerid, experimentid""" % s0
515 s2 =
"""SELECT plannerid, experimentid, MIN(avg_solved) AS avg_solved, avg_total_time 516 FROM (%s) GROUP BY plannerName, experimentid ORDER BY avg_solved DESC, avg_total_time ASC""" % s1
517 c.execute(
'DROP VIEW IF EXISTS bestPlannerConfigsPerExperiment')
518 c.execute(
'CREATE VIEW IF NOT EXISTS bestPlannerConfigsPerExperiment AS %s' % s2)
520 s1 =
"""SELECT plannerid, plannerName, AVG(solved) AS avg_solved, AVG(total_time) AS avg_total_time 521 FROM (%s) GROUP BY plannerid""" % s0
522 s2 =
"""SELECT plannerid, MIN(avg_solved) AS avg_solved, avg_total_time 523 FROM (%s) GROUP BY plannerName ORDER BY avg_solved DESC, avg_total_time ASC""" % s1
524 c.execute(
'DROP VIEW IF EXISTS bestPlannerConfigs')
525 c.execute(
'CREATE VIEW IF NOT EXISTS bestPlannerConfigs AS %s' % s2)
530 if __name__ ==
"__main__":
531 usage =
"""%prog [options] [<benchmark.log> ...]""" 532 parser = OptionParser(
"A script to parse benchmarking results.\n" + usage)
533 parser.add_option(
"-d",
"--database", dest=
"dbname", default=
"benchmark.db",
534 help=
"Filename of benchmark database [default: %default]")
535 parser.add_option(
"-v",
"--view", action=
"store_true", dest=
"view", default=
False,
536 help=
"Compute the views for best planner configurations")
537 parser.add_option(
"-p",
"--plot", dest=
"plot", default=
None,
538 help=
"Create a PDF of plots with the filename provided")
539 parser.add_option(
"-m",
"--mysql", dest=
"mysqldb", default=
None,
540 help=
"Save SQLite3 database as a MySQL dump file")
541 (options, args) = parser.parse_args()
544 parser.error(
"No arguments were provided. Please provide full path of log file")
def readOptionalMultilineValue(filevar)
def plotAttribute(cur, planners, attribute, typename)
def plotStatistics(dbname, fname)
def readRequiredLogValue(name, filevar, desired_token_index, expected_tokens={})
def saveAsMysql(dbname, mysqldump)
def readOptionalLogValue(filevar, desired_token_index, expected_tokens={})
def readLogValue(filevar, desired_token_index, expected_tokens)
def readRequiredMultilineValue(filevar)
def readBenchmarkLog(dbname, filenames)
def ensurePrefix(line, prefix)
def plotProgressAttribute(cur, planners, attribute)