1 """report.py - Utilities for reporting statistics about benchmark results
10 from scipy.stats
import mannwhitneyu
19 return '%s%r' % (self.__class__.__name__,
40 UTEST_MIN_REPETITIONS = 2
41 UTEST_OPTIMAL_REPETITIONS = 9
42 UTEST_COL_NAME =
"_pvalue"
47 Return the result of 'fmt_str.format(*args, **kwargs)' after transforming
48 'args' and 'kwargs' according to the value of 'use_color'. If 'use_color'
49 is False then all color codes in 'args' and 'kwargs' are replaced with
52 assert use_color
is True or use_color
is False
54 args = [arg
if not isinstance(arg, BenchmarkColor)
else BC_NONE
56 kwargs = {key: arg
if not isinstance(arg, BenchmarkColor)
else BC_NONE
57 for key, arg
in kwargs.items()}
58 return fmt_str.format(*args, **kwargs)
63 Return the length of the longest benchmark name in a given list of
64 benchmark JSON objects
67 for bc
in benchmark_list:
68 if len(bc[
'name']) > longest_name:
69 longest_name =
len(bc[
'name'])
75 Return a float representing the decimal change between old_val and new_val.
77 if old_val == 0
and new_val == 0:
80 return float(new_val - old_val) / (float(old_val + new_val) / 2)
81 return float(new_val - old_val) / abs(old_val)
86 Apply a filter to the json, and only leave the 'family' of benchmarks.
88 regex = re.compile(family)
90 filtered[
'benchmarks'] = []
91 for be
in json_orig[
'benchmarks']:
92 if not regex.search(be[
'name']):
94 filteredbench = copy.deepcopy(be)
95 filteredbench[
'name'] = regex.sub(replacement, filteredbench[
'name'])
96 filtered[
'benchmarks'].append(filteredbench)
102 While *keeping* the order, give all the unique 'names' used for benchmarks.
105 uniqued = [x[
'name']
for x
in json[
'benchmarks']
106 if x[
'name']
not in seen
and
107 (seen.add(x[
'name'])
or True)]
113 Given two lists, get a new list consisting of the elements only contained
114 in *both of the input lists*, while preserving the ordering.
116 return [x
for x
in list1
if x
in list2]
120 return (
'time_unit' in x
and 'real_time' in x
and 'cpu_time' in x)
125 While preserving the ordering, find benchmarks with the same names in
126 both of the inputs, and group them.
127 (i.e. partition/filter into groups with common name)
131 names =
intersect(json1_unique_names, json2_unique_names)
137 for x
in json1[
'benchmarks']:
139 time_unit = x[
'time_unit']
141 if time_unit
is None:
145 lhs = [x
for x
in json1[
'benchmarks']
if x[
'name'] == name
and
146 x[
'time_unit'] == time_unit]
147 rhs = [x
for x
in json2[
'benchmarks']
if x[
'name'] == name
and
148 x[
'time_unit'] == time_unit]
149 partitions.append([lhs, rhs])
155 lhs = [x[field_name]
for x
in partition[0]]
156 rhs = [x[field_name]
for x
in partition[1]]
161 min_rep_cnt =
min(
len(timings_time[0]),
162 len(timings_time[1]),
167 if min_rep_cnt < UTEST_MIN_REPETITIONS:
168 return False,
None,
None
170 time_pvalue = mannwhitneyu(
171 timings_time[0], timings_time[1], alternative=
'two-sided').pvalue
172 cpu_pvalue = mannwhitneyu(
173 timings_cpu[0], timings_cpu[1], alternative=
'two-sided').pvalue
175 return (min_rep_cnt >= UTEST_OPTIMAL_REPETITIONS), cpu_pvalue, time_pvalue
177 def print_utest(bc_name, utest, utest_alpha, first_col_width, use_color=True):
178 def get_utest_color(pval):
179 return BC_FAIL
if pval >= utest_alpha
else BC_OKGREEN
182 if not utest[
'have_optimal_repetitions']
and utest[
'cpu_pvalue']
is None and utest[
'time_pvalue']
is None:
185 dsc =
"U Test, Repetitions: {} vs {}".
format(
186 utest[
'nr_of_repetitions'], utest[
'nr_of_repetitions_other'])
187 dsc_color = BC_OKGREEN
190 if not utest[
'have_optimal_repetitions']:
191 dsc_color = BC_WARNING
192 dsc +=
". WARNING: Results unreliable! {}+ repetitions recommended.".
format(
193 UTEST_OPTIMAL_REPETITIONS)
195 special_str =
"{}{:<{}s}{endc}{}{:16.4f}{endc}{}{:16.4f}{endc}{} {}"
200 "{}{}".
format(bc_name, UTEST_COL_NAME),
203 utest[
'time_pvalue']), utest[
'time_pvalue'],
205 utest[
'cpu_pvalue']), utest[
'cpu_pvalue'],
215 Calculate and report the difference between each test of two benchmarks
216 runs specified as 'json1' and 'json2'. Output is another json containing
217 relevant details for each test run.
219 assert utest
is True or utest
is False
223 for partition
in partitions:
224 benchmark_name = partition[0][0][
'name']
225 time_unit = partition[0][0][
'time_unit']
231 other_bench = partition[1][i]
232 measurements.append({
233 'real_time': bn[
'real_time'],
234 'cpu_time': bn[
'cpu_time'],
235 'real_time_other': other_bench[
'real_time'],
236 'cpu_time_other': other_bench[
'cpu_time'],
245 have_optimal_repetitions, cpu_pvalue, time_pvalue =
calc_utest(timings_cpu, timings_time)
246 if cpu_pvalue
and time_pvalue:
248 'have_optimal_repetitions': have_optimal_repetitions,
249 'cpu_pvalue': cpu_pvalue,
250 'time_pvalue': time_pvalue,
251 'nr_of_repetitions':
len(timings_cpu[0]),
252 'nr_of_repetitions_other':
len(timings_cpu[1])
260 run_type = partition[0][0][
'run_type']
if 'run_type' in partition[0][0]
else ''
261 aggregate_name = partition[0][0][
'aggregate_name']
if run_type ==
'aggregate' and 'aggregate_name' in partition[0][0]
else ''
263 'name': benchmark_name,
264 'measurements': measurements,
265 'time_unit': time_unit,
266 'run_type': run_type,
267 'aggregate_name': aggregate_name,
268 'utest': utest_results
276 include_aggregates_only=False,
281 Calculate and report the difference between each test of two benchmarks
282 runs specified as 'json1' and 'json2'.
284 assert utest
is True or utest
is False
295 first_col_width =
max(
298 first_col_width +=
len(UTEST_COL_NAME)
299 first_line =
"{:<{}s}Time CPU Time Old Time New CPU Old CPU New".
format(
300 'Benchmark', 12 + first_col_width)
301 output_strs = [first_line,
'-' *
len(first_line)]
303 fmt_str =
"{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}"
304 for benchmark
in json_diff_report:
307 if not include_aggregates_only
or not 'run_type' in benchmark
or benchmark[
'run_type'] ==
'aggregate':
308 for measurement
in benchmark[
'measurements']:
314 get_color(measurement[
'time']),
316 get_color(measurement[
'cpu']),
318 measurement[
'real_time'],
319 measurement[
'real_time_other'],
320 measurement[
'cpu_time'],
321 measurement[
'cpu_time_other'],
327 if utest
and benchmark[
'utest']:
330 utest_alpha=utest_alpha,
331 first_col_width=first_col_width,
344 testInputs = os.path.join(
346 os.path.realpath(__file__)),
348 testOutput = os.path.join(testInputs,
'test3_run0.json')
349 with open(testOutput,
'r')
as f:
363 print(
"\n".join(output_lines))
364 self.assertEqual(
len(output_lines),
len(expect_lines))
365 for i
in range(0,
len(output_lines)):
366 self.assertEqual(expect_lines[i], output_lines[i])
374 testInputs = os.path.join(
376 os.path.realpath(__file__)),
378 testOutput1 = os.path.join(testInputs,
'test1_run1.json')
379 testOutput2 = os.path.join(testInputs,
'test1_run2.json')
380 with open(testOutput1,
'r')
as f:
382 with open(testOutput2,
'r')
as f:
391 [
'BM_SameTimes',
'+0.0000',
'+0.0000',
'10',
'10',
'10',
'10'],
392 [
'BM_2xFaster',
'-0.5000',
'-0.5000',
'50',
'25',
'50',
'25'],
393 [
'BM_2xSlower',
'+1.0000',
'+1.0000',
'50',
'100',
'50',
'100'],
394 [
'BM_1PercentFaster',
'-0.0100',
'-0.0100',
'100',
'99',
'100',
'99'],
395 [
'BM_1PercentSlower',
'+0.0100',
'+0.0100',
'100',
'101',
'100',
'101'],
396 [
'BM_10PercentFaster',
'-0.1000',
'-0.1000',
'100',
'90',
'100',
'90'],
397 [
'BM_10PercentSlower',
'+0.1000',
'+0.1000',
'100',
'110',
'100',
'110'],
398 [
'BM_100xSlower',
'+99.0000',
'+99.0000',
399 '100',
'10000',
'100',
'10000'],
400 [
'BM_100xFaster',
'-0.9900',
'-0.9900',
401 '10000',
'100',
'10000',
'100'],
402 [
'BM_10PercentCPUToTime',
'+0.1000',
403 '-0.1000',
'100',
'110',
'100',
'90'],
404 [
'BM_ThirdFaster',
'-0.3333',
'-0.3334',
'100',
'67',
'100',
'67'],
405 [
'BM_NotBadTimeUnit',
'-0.9000',
'+0.2000',
'0',
'0',
'0',
'1'],
409 output_lines = output_lines_with_header[2:]
411 print(
"\n".join(output_lines_with_header))
412 self.assertEqual(
len(output_lines),
len(expect_lines))
413 for i
in range(0,
len(output_lines)):
414 parts = [x
for x
in output_lines[i].
split(
' ')
if x]
415 self.assertEqual(
len(parts), 7)
416 self.assertEqual(expect_lines[i], parts)
421 'name':
'BM_SameTimes',
422 'measurements': [{
'time': 0.0000,
'cpu': 0.0000,
'real_time': 10,
'real_time_other': 10,
'cpu_time': 10,
'cpu_time_other': 10}],
427 'name':
'BM_2xFaster',
428 'measurements': [{
'time': -0.5000,
'cpu': -0.5000,
'real_time': 50,
'real_time_other': 25,
'cpu_time': 50,
'cpu_time_other': 25}],
433 'name':
'BM_2xSlower',
434 'measurements': [{
'time': 1.0000,
'cpu': 1.0000,
'real_time': 50,
'real_time_other': 100,
'cpu_time': 50,
'cpu_time_other': 100}],
439 'name':
'BM_1PercentFaster',
440 'measurements': [{
'time': -0.0100,
'cpu': -0.0100,
'real_time': 100,
'real_time_other': 98.9999999,
'cpu_time': 100,
'cpu_time_other': 98.9999999}],
445 'name':
'BM_1PercentSlower',
446 'measurements': [{
'time': 0.0100,
'cpu': 0.0100,
'real_time': 100,
'real_time_other': 101,
'cpu_time': 100,
'cpu_time_other': 101}],
451 'name':
'BM_10PercentFaster',
452 'measurements': [{
'time': -0.1000,
'cpu': -0.1000,
'real_time': 100,
'real_time_other': 90,
'cpu_time': 100,
'cpu_time_other': 90}],
457 'name':
'BM_10PercentSlower',
458 'measurements': [{
'time': 0.1000,
'cpu': 0.1000,
'real_time': 100,
'real_time_other': 110,
'cpu_time': 100,
'cpu_time_other': 110}],
463 'name':
'BM_100xSlower',
464 'measurements': [{
'time': 99.0000,
'cpu': 99.0000,
'real_time': 100,
'real_time_other': 10000,
'cpu_time': 100,
'cpu_time_other': 10000}],
469 'name':
'BM_100xFaster',
470 'measurements': [{
'time': -0.9900,
'cpu': -0.9900,
'real_time': 10000,
'real_time_other': 100,
'cpu_time': 10000,
'cpu_time_other': 100}],
475 'name':
'BM_10PercentCPUToTime',
476 'measurements': [{
'time': 0.1000,
'cpu': -0.1000,
'real_time': 100,
'real_time_other': 110,
'cpu_time': 100,
'cpu_time_other': 90}],
481 'name':
'BM_ThirdFaster',
482 'measurements': [{
'time': -0.3333,
'cpu': -0.3334,
'real_time': 100,
'real_time_other': 67,
'cpu_time': 100,
'cpu_time_other': 67}],
487 'name':
'BM_NotBadTimeUnit',
488 'measurements': [{
'time': -0.9000,
'cpu': 0.2000,
'real_time': 0.4,
'real_time_other': 0.04,
'cpu_time': 0.5,
'cpu_time_other': 0.6}],
494 for out, expected
in zip(
496 self.assertEqual(out[
'name'], expected[
'name'])
497 self.assertEqual(out[
'time_unit'], expected[
'time_unit'])
507 testInputs = os.path.join(
509 os.path.realpath(__file__)),
511 testOutput = os.path.join(testInputs,
'test2_run.json')
512 with open(testOutput,
'r')
as f:
523 [
'.',
'-0.5000',
'-0.5000',
'10',
'5',
'10',
'5'],
524 [
'./4',
'-0.5000',
'-0.5000',
'40',
'20',
'40',
'20'],
525 [
'Prefix/.',
'-0.5000',
'-0.5000',
'20',
'10',
'20',
'10'],
526 [
'Prefix/./3',
'-0.5000',
'-0.5000',
'30',
'15',
'30',
'15'],
530 output_lines = output_lines_with_header[2:]
532 print(
"\n".join(output_lines_with_header))
533 self.assertEqual(
len(output_lines),
len(expect_lines))
534 for i
in range(0,
len(output_lines)):
535 parts = [x
for x
in output_lines[i].
split(
' ')
if x]
536 self.assertEqual(
len(parts), 7)
537 self.assertEqual(expect_lines[i], parts)
543 'measurements': [{
'time': -0.5,
'cpu': -0.5,
'real_time': 10,
'real_time_other': 5,
'cpu_time': 10,
'cpu_time_other': 5}],
549 'measurements': [{
'time': -0.5,
'cpu': -0.5,
'real_time': 40,
'real_time_other': 20,
'cpu_time': 40,
'cpu_time_other': 20}],
555 'measurements': [{
'time': -0.5,
'cpu': -0.5,
'real_time': 20,
'real_time_other': 10,
'cpu_time': 20,
'cpu_time_other': 10}],
560 'name':
u'Prefix/./3',
561 'measurements': [{
'time': -0.5,
'cpu': -0.5,
'real_time': 30,
'real_time_other': 15,
'cpu_time': 30,
'cpu_time_other': 15}],
567 for out, expected
in zip(
569 self.assertEqual(out[
'name'], expected[
'name'])
570 self.assertEqual(out[
'time_unit'], expected[
'time_unit'])
580 testInputs = os.path.join(
582 os.path.realpath(__file__)),
584 testOutput1 = os.path.join(testInputs,
'test3_run0.json')
585 testOutput2 = os.path.join(testInputs,
'test3_run1.json')
586 with open(testOutput1,
'r')
as f:
588 with open(testOutput2,
'r')
as f:
592 json1, json2 = load_results()
594 json1, json2, utest=
True)
598 [
'BM_One',
'-0.1000',
'+0.1000',
'10',
'9',
'100',
'110'],
599 [
'BM_Two',
'+0.1111',
'-0.0111',
'9',
'10',
'90',
'89'],
600 [
'BM_Two',
'-0.1250',
'-0.1628',
'8',
'7',
'86',
'72'],
616 [
'short',
'-0.1250',
'-0.0625',
'8',
'7',
'80',
'75'],
617 [
'short',
'-0.4325',
'-0.1351',
'8',
'5',
'77',
'67'],
633 [
'medium',
'-0.3750',
'-0.3375',
'8',
'5',
'80',
'53'],
637 output_lines = output_lines_with_header[2:]
639 print(
"\n".join(output_lines_with_header))
640 self.assertEqual(
len(output_lines),
len(expect_lines))
641 for i
in range(0,
len(output_lines)):
642 parts = [x
for x
in output_lines[i].
split(
' ')
if x]
643 self.assertEqual(expect_lines[i], parts)
647 [
'BM_One',
'-0.1000',
'+0.1000',
'10',
'9',
'100',
'110'],
663 [
'short',
'-0.1250',
'-0.0625',
'8',
'7',
'80',
'75'],
664 [
'short',
'-0.4325',
'-0.1351',
'8',
'5',
'77',
'67'],
682 self.
json_diff_report, include_aggregates_only=
True, utest=
True, utest_alpha=0.05, use_color=
False)
683 output_lines = output_lines_with_header[2:]
685 print(
"\n".join(output_lines_with_header))
686 self.assertEqual(
len(output_lines),
len(expect_lines))
687 for i
in range(0,
len(output_lines)):
688 parts = [x
for x
in output_lines[i].
split(
' ')
if x]
689 self.assertEqual(expect_lines[i], parts)
699 'real_time_other': 9,
701 'cpu_time_other': 110}
709 {
'time': 0.1111111111111111,
710 'cpu': -0.011111111111111112,
712 'real_time_other': 10,
714 'cpu_time_other': 89},
715 {
'time': -0.125,
'cpu': -0.16279069767441862,
'real_time': 8,
716 'real_time_other': 7,
'cpu_time': 86,
'cpu_time_other': 72}
720 'have_optimal_repetitions':
False,
'cpu_pvalue': 0.6666666666666666,
'time_pvalue': 1.0
729 'real_time_other': 7,
731 'cpu_time_other': 75},
733 'cpu': -0.13506493506493514,
735 'real_time_other': 4.54,
737 'cpu_time_other': 66.6}
741 'have_optimal_repetitions':
False,
'cpu_pvalue': 0.2,
'time_pvalue': 0.7670968684102772
750 'real_time_other': 5,
752 'cpu_time_other': 53}
759 for out, expected
in zip(
761 self.assertEqual(out[
'name'], expected[
'name'])
762 self.assertEqual(out[
'time_unit'], expected[
'time_unit'])
773 testInputs = os.path.join(
775 os.path.realpath(__file__)),
777 testOutput1 = os.path.join(testInputs,
'test3_run0.json')
778 testOutput2 = os.path.join(testInputs,
'test3_run1.json')
779 with open(testOutput1,
'r')
as f:
781 with open(testOutput2,
'r')
as f:
785 json1, json2 = load_results()
787 json1, json2, utest=
True)
791 [
'BM_One',
'-0.1000',
'+0.1000',
'10',
'9',
'100',
'110'],
792 [
'BM_Two',
'+0.1111',
'-0.0111',
'9',
'10',
'90',
'89'],
793 [
'BM_Two',
'-0.1250',
'-0.1628',
'8',
'7',
'86',
'72'],
809 [
'short',
'-0.1250',
'-0.0625',
'8',
'7',
'80',
'75'],
810 [
'short',
'-0.4325',
'-0.1351',
'8',
'5',
'77',
'67'],
826 [
'medium',
'-0.3750',
'-0.3375',
'8',
'5',
'80',
'53']
830 utest=
True, utest_alpha=0.05, use_color=
False)
831 output_lines = output_lines_with_header[2:]
833 print(
"\n".join(output_lines_with_header))
834 self.assertEqual(
len(output_lines),
len(expect_lines))
835 for i
in range(0,
len(output_lines)):
836 parts = [x
for x
in output_lines[i].
split(
' ')
if x]
837 self.assertEqual(expect_lines[i], parts)
847 'real_time_other': 9,
849 'cpu_time_other': 110}
857 {
'time': 0.1111111111111111,
858 'cpu': -0.011111111111111112,
860 'real_time_other': 10,
862 'cpu_time_other': 89},
863 {
'time': -0.125,
'cpu': -0.16279069767441862,
'real_time': 8,
864 'real_time_other': 7,
'cpu_time': 86,
'cpu_time_other': 72}
868 'have_optimal_repetitions':
False,
'cpu_pvalue': 0.6666666666666666,
'time_pvalue': 1.0
877 'real_time_other': 7,
879 'cpu_time_other': 75},
881 'cpu': -0.13506493506493514,
883 'real_time_other': 4.54,
885 'cpu_time_other': 66.6}
889 'have_optimal_repetitions':
False,
'cpu_pvalue': 0.2,
'time_pvalue': 0.7670968684102772
895 {
'real_time_other': 5,
899 'cpu_time_other': 53,
909 for out, expected
in zip(
911 self.assertEqual(out[
'name'], expected[
'name'])
912 self.assertEqual(out[
'time_unit'], expected[
'time_unit'])
924 testInputs = os.path.join(
926 os.path.realpath(__file__)),
928 testOutput1 = os.path.join(testInputs,
'test4_run0.json')
929 testOutput2 = os.path.join(testInputs,
'test4_run1.json')
930 with open(testOutput1,
'r')
as f:
932 with open(testOutput2,
'r')
as f:
936 json1, json2 = load_results()
938 json1, json2, utest=
True)
942 [
'whocares',
'-0.5000',
'+0.5000',
'0',
'0',
'0',
'0']
946 utest=
True, utest_alpha=0.05, use_color=
False)
947 output_lines = output_lines_with_header[2:]
949 print(
"\n".join(output_lines_with_header))
950 self.assertEqual(
len(output_lines),
len(expect_lines))
951 for i
in range(0,
len(output_lines)):
952 parts = [x
for x
in output_lines[i].
split(
' ')
if x]
953 self.assertEqual(expect_lines[i], parts)
963 'real_time_other': 0.005,
965 'cpu_time_other': 0.15}
972 for out, expected
in zip(
974 self.assertEqual(out[
'name'], expected[
'name'])
975 self.assertEqual(out[
'time_unit'], expected[
'time_unit'])
985 testInputs = os.path.join(
987 os.path.realpath(__file__)),
989 testOutput = os.path.join(testInputs,
'test4_run.json')
990 with open(testOutput,
'r')
as f:
1000 "99 family 0 instance 0 repetition 0",
1001 "98 family 0 instance 0 repetition 1",
1002 "97 family 0 instance 0 aggregate",
1003 "96 family 0 instance 1 repetition 0",
1004 "95 family 0 instance 1 repetition 1",
1005 "94 family 0 instance 1 aggregate",
1006 "93 family 1 instance 0 repetition 0",
1007 "92 family 1 instance 0 repetition 1",
1008 "91 family 1 instance 0 aggregate",
1009 "90 family 1 instance 1 repetition 0",
1010 "89 family 1 instance 1 repetition 1",
1011 "88 family 1 instance 1 aggregate"
1015 random.shuffle(self.
json[
'benchmarks'])
1016 sorted_benchmarks = util.sort_benchmark_results(self.
json)[
1018 self.assertEqual(
len(expected_names),
len(sorted_benchmarks))
1019 for out, expected
in zip(sorted_benchmarks, expected_names):
1020 self.assertEqual(out[
'name'], expected)
1025 unittest_instance.assertAlmostEqual(
1026 lhs[
'utest'][
'cpu_pvalue'],
1027 rhs[
'utest'][
'cpu_pvalue'])
1028 unittest_instance.assertAlmostEqual(
1029 lhs[
'utest'][
'time_pvalue'],
1030 rhs[
'utest'][
'time_pvalue'])
1031 unittest_instance.assertEqual(
1032 lhs[
'utest'][
'have_optimal_repetitions'],
1033 rhs[
'utest'][
'have_optimal_repetitions'])
1036 unittest_instance.assertEqual(lhs[
'utest'], rhs[
'utest'])
1040 for m1, m2
in zip(lhs[
'measurements'], rhs[
'measurements']):
1041 unittest_instance.assertEqual(m1[
'real_time'], m2[
'real_time'])
1042 unittest_instance.assertEqual(m1[
'cpu_time'], m2[
'cpu_time'])
1045 unittest_instance.assertAlmostEqual(m1[
'time'], m2[
'time'], places=4)
1046 unittest_instance.assertAlmostEqual(m1[
'cpu'], m2[
'cpu'], places=4)
1049 if __name__ ==
'__main__':