benchmark/tools/gbench/report.py
Go to the documentation of this file.
1 """report.py - Utilities for reporting statistics about benchmark results
2 """
3 
4 import unittest
5 import os
6 import re
7 import copy
8 import random
9 
10 from scipy.stats import mannwhitneyu
11 
12 
13 class BenchmarkColor(object):
14  def __init__(self, name, code):
15  self.name = name
16  self.code = code
17 
18  def __repr__(self):
19  return '%s%r' % (self.__class__.__name__,
20  (self.name, self.code))
21 
22  def __format__(self, format):
23  return self.code
24 
25 
26 # Benchmark Colors Enumeration
27 BC_NONE = BenchmarkColor('NONE', '')
28 BC_MAGENTA = BenchmarkColor('MAGENTA', '\033[95m')
29 BC_CYAN = BenchmarkColor('CYAN', '\033[96m')
30 BC_OKBLUE = BenchmarkColor('OKBLUE', '\033[94m')
31 BC_OKGREEN = BenchmarkColor('OKGREEN', '\033[32m')
32 BC_HEADER = BenchmarkColor('HEADER', '\033[92m')
33 BC_WARNING = BenchmarkColor('WARNING', '\033[93m')
34 BC_WHITE = BenchmarkColor('WHITE', '\033[97m')
35 BC_FAIL = BenchmarkColor('FAIL', '\033[91m')
36 BC_ENDC = BenchmarkColor('ENDC', '\033[0m')
37 BC_BOLD = BenchmarkColor('BOLD', '\033[1m')
38 BC_UNDERLINE = BenchmarkColor('UNDERLINE', '\033[4m')
39 
40 UTEST_MIN_REPETITIONS = 2
41 UTEST_OPTIMAL_REPETITIONS = 9 # Lowest reasonable number, More is better.
42 UTEST_COL_NAME = "_pvalue"
43 
44 
45 def color_format(use_color, fmt_str, *args, **kwargs):
46  """
47  Return the result of 'fmt_str.format(*args, **kwargs)' after transforming
48  'args' and 'kwargs' according to the value of 'use_color'. If 'use_color'
49  is False then all color codes in 'args' and 'kwargs' are replaced with
50  the empty string.
51  """
52  assert use_color is True or use_color is False
53  if not use_color:
54  args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE
55  for arg in args]
56  kwargs = {key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE
57  for key, arg in kwargs.items()}
58  return fmt_str.format(*args, **kwargs)
59 
60 
61 def find_longest_name(benchmark_list):
62  """
63  Return the length of the longest benchmark name in a given list of
64  benchmark JSON objects
65  """
66  longest_name = 1
67  for bc in benchmark_list:
68  if len(bc['name']) > longest_name:
69  longest_name = len(bc['name'])
70  return longest_name
71 
72 
73 def calculate_change(old_val, new_val):
74  """
75  Return a float representing the decimal change between old_val and new_val.
76  """
77  if old_val == 0 and new_val == 0:
78  return 0.0
79  if old_val == 0:
80  return float(new_val - old_val) / (float(old_val + new_val) / 2)
81  return float(new_val - old_val) / abs(old_val)
82 
83 
84 def filter_benchmark(json_orig, family, replacement=""):
85  """
86  Apply a filter to the json, and only leave the 'family' of benchmarks.
87  """
88  regex = re.compile(family)
89  filtered = {}
90  filtered['benchmarks'] = []
91  for be in json_orig['benchmarks']:
92  if not regex.search(be['name']):
93  continue
94  filteredbench = copy.deepcopy(be) # Do NOT modify the old name!
95  filteredbench['name'] = regex.sub(replacement, filteredbench['name'])
96  filtered['benchmarks'].append(filteredbench)
97  return filtered
98 
99 
101  """
102  While *keeping* the order, give all the unique 'names' used for benchmarks.
103  """
104  seen = set()
105  uniqued = [x['name'] for x in json['benchmarks']
106  if x['name'] not in seen and
107  (seen.add(x['name']) or True)]
108  return uniqued
109 
110 
111 def intersect(list1, list2):
112  """
113  Given two lists, get a new list consisting of the elements only contained
114  in *both of the input lists*, while preserving the ordering.
115  """
116  return [x for x in list1 if x in list2]
117 
118 
120  return ('time_unit' in x and 'real_time' in x and 'cpu_time' in x)
121 
122 
123 def partition_benchmarks(json1, json2):
124  """
125  While preserving the ordering, find benchmarks with the same names in
126  both of the inputs, and group them.
127  (i.e. partition/filter into groups with common name)
128  """
129  json1_unique_names = get_unique_benchmark_names(json1)
130  json2_unique_names = get_unique_benchmark_names(json2)
131  names = intersect(json1_unique_names, json2_unique_names)
132  partitions = []
133  for name in names:
134  time_unit = None
135  # Pick the time unit from the first entry of the lhs benchmark.
136  # We should be careful not to crash with unexpected input.
137  for x in json1['benchmarks']:
138  if (x['name'] == name and is_potentially_comparable_benchmark(x)):
139  time_unit = x['time_unit']
140  break
141  if time_unit is None:
142  continue
143  # Filter by name and time unit.
144  # All the repetitions are assumed to be comparable.
145  lhs = [x for x in json1['benchmarks'] if x['name'] == name and
146  x['time_unit'] == time_unit]
147  rhs = [x for x in json2['benchmarks'] if x['name'] == name and
148  x['time_unit'] == time_unit]
149  partitions.append([lhs, rhs])
150  return partitions
151 
152 
153 def extract_field(partition, field_name):
154  # The count of elements may be different. We want *all* of them.
155  lhs = [x[field_name] for x in partition[0]]
156  rhs = [x[field_name] for x in partition[1]]
157  return [lhs, rhs]
158 
159 
160 def calc_utest(timings_cpu, timings_time):
161  min_rep_cnt = min(len(timings_time[0]),
162  len(timings_time[1]),
163  len(timings_cpu[0]),
164  len(timings_cpu[1]))
165 
166  # Does *everything* has at least UTEST_MIN_REPETITIONS repetitions?
167  if min_rep_cnt < UTEST_MIN_REPETITIONS:
168  return False, None, None
169 
170  time_pvalue = mannwhitneyu(
171  timings_time[0], timings_time[1], alternative='two-sided').pvalue
172  cpu_pvalue = mannwhitneyu(
173  timings_cpu[0], timings_cpu[1], alternative='two-sided').pvalue
174 
175  return (min_rep_cnt >= UTEST_OPTIMAL_REPETITIONS), cpu_pvalue, time_pvalue
176 
177 def print_utest(bc_name, utest, utest_alpha, first_col_width, use_color=True):
178  def get_utest_color(pval):
179  return BC_FAIL if pval >= utest_alpha else BC_OKGREEN
180 
181  # Check if we failed miserably with minimum required repetitions for utest
182  if not utest['have_optimal_repetitions'] and utest['cpu_pvalue'] is None and utest['time_pvalue'] is None:
183  return []
184 
185  dsc = "U Test, Repetitions: {} vs {}".format(
186  utest['nr_of_repetitions'], utest['nr_of_repetitions_other'])
187  dsc_color = BC_OKGREEN
188 
189  # We still got some results to show but issue a warning about it.
190  if not utest['have_optimal_repetitions']:
191  dsc_color = BC_WARNING
192  dsc += ". WARNING: Results unreliable! {}+ repetitions recommended.".format(
193  UTEST_OPTIMAL_REPETITIONS)
194 
195  special_str = "{}{:<{}s}{endc}{}{:16.4f}{endc}{}{:16.4f}{endc}{} {}"
196 
197  return [color_format(use_color,
198  special_str,
199  BC_HEADER,
200  "{}{}".format(bc_name, UTEST_COL_NAME),
201  first_col_width,
202  get_utest_color(
203  utest['time_pvalue']), utest['time_pvalue'],
204  get_utest_color(
205  utest['cpu_pvalue']), utest['cpu_pvalue'],
206  dsc_color, dsc,
207  endc=BC_ENDC)]
208 
209 
211  json1,
212  json2,
213  utest=False):
214  """
215  Calculate and report the difference between each test of two benchmarks
216  runs specified as 'json1' and 'json2'. Output is another json containing
217  relevant details for each test run.
218  """
219  assert utest is True or utest is False
220 
221  diff_report = []
222  partitions = partition_benchmarks(json1, json2)
223  for partition in partitions:
224  benchmark_name = partition[0][0]['name']
225  time_unit = partition[0][0]['time_unit']
226  measurements = []
227  utest_results = {}
228  # Careful, we may have different repetition count.
229  for i in range(min(len(partition[0]), len(partition[1]))):
230  bn = partition[0][i]
231  other_bench = partition[1][i]
232  measurements.append({
233  'real_time': bn['real_time'],
234  'cpu_time': bn['cpu_time'],
235  'real_time_other': other_bench['real_time'],
236  'cpu_time_other': other_bench['cpu_time'],
237  'time': calculate_change(bn['real_time'], other_bench['real_time']),
238  'cpu': calculate_change(bn['cpu_time'], other_bench['cpu_time'])
239  })
240 
241  # After processing the whole partition, if requested, do the U test.
242  if utest:
243  timings_cpu = extract_field(partition, 'cpu_time')
244  timings_time = extract_field(partition, 'real_time')
245  have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest(timings_cpu, timings_time)
246  if cpu_pvalue and time_pvalue:
247  utest_results = {
248  'have_optimal_repetitions': have_optimal_repetitions,
249  'cpu_pvalue': cpu_pvalue,
250  'time_pvalue': time_pvalue,
251  'nr_of_repetitions': len(timings_cpu[0]),
252  'nr_of_repetitions_other': len(timings_cpu[1])
253  }
254 
255  # Store only if we had any measurements for given benchmark.
256  # E.g. partition_benchmarks will filter out the benchmarks having
257  # time units which are not compatible with other time units in the
258  # benchmark suite.
259  if measurements:
260  run_type = partition[0][0]['run_type'] if 'run_type' in partition[0][0] else ''
261  aggregate_name = partition[0][0]['aggregate_name'] if run_type == 'aggregate' and 'aggregate_name' in partition[0][0] else ''
262  diff_report.append({
263  'name': benchmark_name,
264  'measurements': measurements,
265  'time_unit': time_unit,
266  'run_type': run_type,
267  'aggregate_name': aggregate_name,
268  'utest': utest_results
269  })
270 
271  return diff_report
272 
273 
275  json_diff_report,
276  include_aggregates_only=False,
277  utest=False,
278  utest_alpha=0.05,
279  use_color=True):
280  """
281  Calculate and report the difference between each test of two benchmarks
282  runs specified as 'json1' and 'json2'.
283  """
284  assert utest is True or utest is False
285 
286  def get_color(res):
287  if res > 0.05:
288  return BC_FAIL
289  elif res > -0.07:
290  return BC_WHITE
291  else:
292  return BC_CYAN
293 
294  first_col_width = find_longest_name(json_diff_report)
295  first_col_width = max(
296  first_col_width,
297  len('Benchmark'))
298  first_col_width += len(UTEST_COL_NAME)
299  first_line = "{:<{}s}Time CPU Time Old Time New CPU Old CPU New".format(
300  'Benchmark', 12 + first_col_width)
301  output_strs = [first_line, '-' * len(first_line)]
302 
303  fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}"
304  for benchmark in json_diff_report:
305  # *If* we were asked to only include aggregates,
306  # and if it is non-aggregate, then don't print it.
307  if not include_aggregates_only or not 'run_type' in benchmark or benchmark['run_type'] == 'aggregate':
308  for measurement in benchmark['measurements']:
309  output_strs += [color_format(use_color,
310  fmt_str,
311  BC_HEADER,
312  benchmark['name'],
313  first_col_width,
314  get_color(measurement['time']),
315  measurement['time'],
316  get_color(measurement['cpu']),
317  measurement['cpu'],
318  measurement['real_time'],
319  measurement['real_time_other'],
320  measurement['cpu_time'],
321  measurement['cpu_time_other'],
322  endc=BC_ENDC)]
323 
324  # After processing the measurements, if requested and
325  # if applicable (e.g. u-test exists for given benchmark),
326  # print the U test.
327  if utest and benchmark['utest']:
328  output_strs += print_utest(benchmark['name'],
329  benchmark['utest'],
330  utest_alpha=utest_alpha,
331  first_col_width=first_col_width,
332  use_color=use_color)
333 
334  return output_strs
335 
336 
337 
339 
340 
341 class TestGetUniqueBenchmarkNames(unittest.TestCase):
342  def load_results(self):
343  import json
344  testInputs = os.path.join(
345  os.path.dirname(
346  os.path.realpath(__file__)),
347  'Inputs')
348  testOutput = os.path.join(testInputs, 'test3_run0.json')
349  with open(testOutput, 'r') as f:
350  json = json.load(f)
351  return json
352 
353  def test_basic(self):
354  expect_lines = [
355  'BM_One',
356  'BM_Two',
357  'short', # These two are not sorted
358  'medium', # These two are not sorted
359  ]
360  json = self.load_results()
361  output_lines = get_unique_benchmark_names(json)
362  print("\n")
363  print("\n".join(output_lines))
364  self.assertEqual(len(output_lines), len(expect_lines))
365  for i in range(0, len(output_lines)):
366  self.assertEqual(expect_lines[i], output_lines[i])
367 
368 
369 class TestReportDifference(unittest.TestCase):
370  @classmethod
371  def setUpClass(cls):
372  def load_results():
373  import json
374  testInputs = os.path.join(
375  os.path.dirname(
376  os.path.realpath(__file__)),
377  'Inputs')
378  testOutput1 = os.path.join(testInputs, 'test1_run1.json')
379  testOutput2 = os.path.join(testInputs, 'test1_run2.json')
380  with open(testOutput1, 'r') as f:
381  json1 = json.load(f)
382  with open(testOutput2, 'r') as f:
383  json2 = json.load(f)
384  return json1, json2
385 
386  json1, json2 = load_results()
388 
390  expect_lines = [
391  ['BM_SameTimes', '+0.0000', '+0.0000', '10', '10', '10', '10'],
392  ['BM_2xFaster', '-0.5000', '-0.5000', '50', '25', '50', '25'],
393  ['BM_2xSlower', '+1.0000', '+1.0000', '50', '100', '50', '100'],
394  ['BM_1PercentFaster', '-0.0100', '-0.0100', '100', '99', '100', '99'],
395  ['BM_1PercentSlower', '+0.0100', '+0.0100', '100', '101', '100', '101'],
396  ['BM_10PercentFaster', '-0.1000', '-0.1000', '100', '90', '100', '90'],
397  ['BM_10PercentSlower', '+0.1000', '+0.1000', '100', '110', '100', '110'],
398  ['BM_100xSlower', '+99.0000', '+99.0000',
399  '100', '10000', '100', '10000'],
400  ['BM_100xFaster', '-0.9900', '-0.9900',
401  '10000', '100', '10000', '100'],
402  ['BM_10PercentCPUToTime', '+0.1000',
403  '-0.1000', '100', '110', '100', '90'],
404  ['BM_ThirdFaster', '-0.3333', '-0.3334', '100', '67', '100', '67'],
405  ['BM_NotBadTimeUnit', '-0.9000', '+0.2000', '0', '0', '0', '1'],
406  ]
407  output_lines_with_header = print_difference_report(
408  self.json_diff_report, use_color=False)
409  output_lines = output_lines_with_header[2:]
410  print("\n")
411  print("\n".join(output_lines_with_header))
412  self.assertEqual(len(output_lines), len(expect_lines))
413  for i in range(0, len(output_lines)):
414  parts = [x for x in output_lines[i].split(' ') if x]
415  self.assertEqual(len(parts), 7)
416  self.assertEqual(expect_lines[i], parts)
417 
419  expected_output = [
420  {
421  'name': 'BM_SameTimes',
422  'measurements': [{'time': 0.0000, 'cpu': 0.0000, 'real_time': 10, 'real_time_other': 10, 'cpu_time': 10, 'cpu_time_other': 10}],
423  'time_unit': 'ns',
424  'utest': {}
425  },
426  {
427  'name': 'BM_2xFaster',
428  'measurements': [{'time': -0.5000, 'cpu': -0.5000, 'real_time': 50, 'real_time_other': 25, 'cpu_time': 50, 'cpu_time_other': 25}],
429  'time_unit': 'ns',
430  'utest': {}
431  },
432  {
433  'name': 'BM_2xSlower',
434  'measurements': [{'time': 1.0000, 'cpu': 1.0000, 'real_time': 50, 'real_time_other': 100, 'cpu_time': 50, 'cpu_time_other': 100}],
435  'time_unit': 'ns',
436  'utest': {}
437  },
438  {
439  'name': 'BM_1PercentFaster',
440  'measurements': [{'time': -0.0100, 'cpu': -0.0100, 'real_time': 100, 'real_time_other': 98.9999999, 'cpu_time': 100, 'cpu_time_other': 98.9999999}],
441  'time_unit': 'ns',
442  'utest': {}
443  },
444  {
445  'name': 'BM_1PercentSlower',
446  'measurements': [{'time': 0.0100, 'cpu': 0.0100, 'real_time': 100, 'real_time_other': 101, 'cpu_time': 100, 'cpu_time_other': 101}],
447  'time_unit': 'ns',
448  'utest': {}
449  },
450  {
451  'name': 'BM_10PercentFaster',
452  'measurements': [{'time': -0.1000, 'cpu': -0.1000, 'real_time': 100, 'real_time_other': 90, 'cpu_time': 100, 'cpu_time_other': 90}],
453  'time_unit': 'ns',
454  'utest': {}
455  },
456  {
457  'name': 'BM_10PercentSlower',
458  'measurements': [{'time': 0.1000, 'cpu': 0.1000, 'real_time': 100, 'real_time_other': 110, 'cpu_time': 100, 'cpu_time_other': 110}],
459  'time_unit': 'ns',
460  'utest': {}
461  },
462  {
463  'name': 'BM_100xSlower',
464  'measurements': [{'time': 99.0000, 'cpu': 99.0000, 'real_time': 100, 'real_time_other': 10000, 'cpu_time': 100, 'cpu_time_other': 10000}],
465  'time_unit': 'ns',
466  'utest': {}
467  },
468  {
469  'name': 'BM_100xFaster',
470  'measurements': [{'time': -0.9900, 'cpu': -0.9900, 'real_time': 10000, 'real_time_other': 100, 'cpu_time': 10000, 'cpu_time_other': 100}],
471  'time_unit': 'ns',
472  'utest': {}
473  },
474  {
475  'name': 'BM_10PercentCPUToTime',
476  'measurements': [{'time': 0.1000, 'cpu': -0.1000, 'real_time': 100, 'real_time_other': 110, 'cpu_time': 100, 'cpu_time_other': 90}],
477  'time_unit': 'ns',
478  'utest': {}
479  },
480  {
481  'name': 'BM_ThirdFaster',
482  'measurements': [{'time': -0.3333, 'cpu': -0.3334, 'real_time': 100, 'real_time_other': 67, 'cpu_time': 100, 'cpu_time_other': 67}],
483  'time_unit': 'ns',
484  'utest': {}
485  },
486  {
487  'name': 'BM_NotBadTimeUnit',
488  'measurements': [{'time': -0.9000, 'cpu': 0.2000, 'real_time': 0.4, 'real_time_other': 0.04, 'cpu_time': 0.5, 'cpu_time_other': 0.6}],
489  'time_unit': 's',
490  'utest': {}
491  },
492  ]
493  self.assertEqual(len(self.json_diff_report), len(expected_output))
494  for out, expected in zip(
495  self.json_diff_report, expected_output):
496  self.assertEqual(out['name'], expected['name'])
497  self.assertEqual(out['time_unit'], expected['time_unit'])
498  assert_utest(self, out, expected)
499  assert_measurements(self, out, expected)
500 
501 
502 class TestReportDifferenceBetweenFamilies(unittest.TestCase):
503  @classmethod
504  def setUpClass(cls):
505  def load_result():
506  import json
507  testInputs = os.path.join(
508  os.path.dirname(
509  os.path.realpath(__file__)),
510  'Inputs')
511  testOutput = os.path.join(testInputs, 'test2_run.json')
512  with open(testOutput, 'r') as f:
513  json = json.load(f)
514  return json
515 
516  json = load_result()
517  json1 = filter_benchmark(json, "BM_Z.ro", ".")
518  json2 = filter_benchmark(json, "BM_O.e", ".")
520 
522  expect_lines = [
523  ['.', '-0.5000', '-0.5000', '10', '5', '10', '5'],
524  ['./4', '-0.5000', '-0.5000', '40', '20', '40', '20'],
525  ['Prefix/.', '-0.5000', '-0.5000', '20', '10', '20', '10'],
526  ['Prefix/./3', '-0.5000', '-0.5000', '30', '15', '30', '15'],
527  ]
528  output_lines_with_header = print_difference_report(
529  self.json_diff_report, use_color=False)
530  output_lines = output_lines_with_header[2:]
531  print("\n")
532  print("\n".join(output_lines_with_header))
533  self.assertEqual(len(output_lines), len(expect_lines))
534  for i in range(0, len(output_lines)):
535  parts = [x for x in output_lines[i].split(' ') if x]
536  self.assertEqual(len(parts), 7)
537  self.assertEqual(expect_lines[i], parts)
538 
540  expected_output = [
541  {
542  'name': u'.',
543  'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 10, 'real_time_other': 5, 'cpu_time': 10, 'cpu_time_other': 5}],
544  'time_unit': 'ns',
545  'utest': {}
546  },
547  {
548  'name': u'./4',
549  'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 40, 'real_time_other': 20, 'cpu_time': 40, 'cpu_time_other': 20}],
550  'time_unit': 'ns',
551  'utest': {},
552  },
553  {
554  'name': u'Prefix/.',
555  'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 20, 'real_time_other': 10, 'cpu_time': 20, 'cpu_time_other': 10}],
556  'time_unit': 'ns',
557  'utest': {}
558  },
559  {
560  'name': u'Prefix/./3',
561  'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 30, 'real_time_other': 15, 'cpu_time': 30, 'cpu_time_other': 15}],
562  'time_unit': 'ns',
563  'utest': {}
564  }
565  ]
566  self.assertEqual(len(self.json_diff_report), len(expected_output))
567  for out, expected in zip(
568  self.json_diff_report, expected_output):
569  self.assertEqual(out['name'], expected['name'])
570  self.assertEqual(out['time_unit'], expected['time_unit'])
571  assert_utest(self, out, expected)
572  assert_measurements(self, out, expected)
573 
574 
575 class TestReportDifferenceWithUTest(unittest.TestCase):
576  @classmethod
577  def setUpClass(cls):
578  def load_results():
579  import json
580  testInputs = os.path.join(
581  os.path.dirname(
582  os.path.realpath(__file__)),
583  'Inputs')
584  testOutput1 = os.path.join(testInputs, 'test3_run0.json')
585  testOutput2 = os.path.join(testInputs, 'test3_run1.json')
586  with open(testOutput1, 'r') as f:
587  json1 = json.load(f)
588  with open(testOutput2, 'r') as f:
589  json2 = json.load(f)
590  return json1, json2
591 
592  json1, json2 = load_results()
594  json1, json2, utest=True)
595 
597  expect_lines = [
598  ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
599  ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'],
600  ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'],
601  ['BM_Two_pvalue',
602  '1.0000',
603  '0.6667',
604  'U',
605  'Test,',
606  'Repetitions:',
607  '2',
608  'vs',
609  '2.',
610  'WARNING:',
611  'Results',
612  'unreliable!',
613  '9+',
614  'repetitions',
615  'recommended.'],
616  ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
617  ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
618  ['short_pvalue',
619  '0.7671',
620  '0.2000',
621  'U',
622  'Test,',
623  'Repetitions:',
624  '2',
625  'vs',
626  '3.',
627  'WARNING:',
628  'Results',
629  'unreliable!',
630  '9+',
631  'repetitions',
632  'recommended.'],
633  ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'],
634  ]
635  output_lines_with_header = print_difference_report(
636  self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False)
637  output_lines = output_lines_with_header[2:]
638  print("\n")
639  print("\n".join(output_lines_with_header))
640  self.assertEqual(len(output_lines), len(expect_lines))
641  for i in range(0, len(output_lines)):
642  parts = [x for x in output_lines[i].split(' ') if x]
643  self.assertEqual(expect_lines[i], parts)
644 
646  expect_lines = [
647  ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
648  ['BM_Two_pvalue',
649  '1.0000',
650  '0.6667',
651  'U',
652  'Test,',
653  'Repetitions:',
654  '2',
655  'vs',
656  '2.',
657  'WARNING:',
658  'Results',
659  'unreliable!',
660  '9+',
661  'repetitions',
662  'recommended.'],
663  ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
664  ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
665  ['short_pvalue',
666  '0.7671',
667  '0.2000',
668  'U',
669  'Test,',
670  'Repetitions:',
671  '2',
672  'vs',
673  '3.',
674  'WARNING:',
675  'Results',
676  'unreliable!',
677  '9+',
678  'repetitions',
679  'recommended.'],
680  ]
681  output_lines_with_header = print_difference_report(
682  self.json_diff_report, include_aggregates_only=True, utest=True, utest_alpha=0.05, use_color=False)
683  output_lines = output_lines_with_header[2:]
684  print("\n")
685  print("\n".join(output_lines_with_header))
686  self.assertEqual(len(output_lines), len(expect_lines))
687  for i in range(0, len(output_lines)):
688  parts = [x for x in output_lines[i].split(' ') if x]
689  self.assertEqual(expect_lines[i], parts)
690 
692  expected_output = [
693  {
694  'name': u'BM_One',
695  'measurements': [
696  {'time': -0.1,
697  'cpu': 0.1,
698  'real_time': 10,
699  'real_time_other': 9,
700  'cpu_time': 100,
701  'cpu_time_other': 110}
702  ],
703  'time_unit': 'ns',
704  'utest': {}
705  },
706  {
707  'name': u'BM_Two',
708  'measurements': [
709  {'time': 0.1111111111111111,
710  'cpu': -0.011111111111111112,
711  'real_time': 9,
712  'real_time_other': 10,
713  'cpu_time': 90,
714  'cpu_time_other': 89},
715  {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8,
716  'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72}
717  ],
718  'time_unit': 'ns',
719  'utest': {
720  'have_optimal_repetitions': False, 'cpu_pvalue': 0.6666666666666666, 'time_pvalue': 1.0
721  }
722  },
723  {
724  'name': u'short',
725  'measurements': [
726  {'time': -0.125,
727  'cpu': -0.0625,
728  'real_time': 8,
729  'real_time_other': 7,
730  'cpu_time': 80,
731  'cpu_time_other': 75},
732  {'time': -0.4325,
733  'cpu': -0.13506493506493514,
734  'real_time': 8,
735  'real_time_other': 4.54,
736  'cpu_time': 77,
737  'cpu_time_other': 66.6}
738  ],
739  'time_unit': 'ns',
740  'utest': {
741  'have_optimal_repetitions': False, 'cpu_pvalue': 0.2, 'time_pvalue': 0.7670968684102772
742  }
743  },
744  {
745  'name': u'medium',
746  'measurements': [
747  {'time': -0.375,
748  'cpu': -0.3375,
749  'real_time': 8,
750  'real_time_other': 5,
751  'cpu_time': 80,
752  'cpu_time_other': 53}
753  ],
754  'time_unit': 'ns',
755  'utest': {}
756  }
757  ]
758  self.assertEqual(len(self.json_diff_report), len(expected_output))
759  for out, expected in zip(
760  self.json_diff_report, expected_output):
761  self.assertEqual(out['name'], expected['name'])
762  self.assertEqual(out['time_unit'], expected['time_unit'])
763  assert_utest(self, out, expected)
764  assert_measurements(self, out, expected)
765 
766 
768  unittest.TestCase):
769  @classmethod
770  def setUpClass(cls):
771  def load_results():
772  import json
773  testInputs = os.path.join(
774  os.path.dirname(
775  os.path.realpath(__file__)),
776  'Inputs')
777  testOutput1 = os.path.join(testInputs, 'test3_run0.json')
778  testOutput2 = os.path.join(testInputs, 'test3_run1.json')
779  with open(testOutput1, 'r') as f:
780  json1 = json.load(f)
781  with open(testOutput2, 'r') as f:
782  json2 = json.load(f)
783  return json1, json2
784 
785  json1, json2 = load_results()
787  json1, json2, utest=True)
788 
790  expect_lines = [
791  ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
792  ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'],
793  ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'],
794  ['BM_Two_pvalue',
795  '1.0000',
796  '0.6667',
797  'U',
798  'Test,',
799  'Repetitions:',
800  '2',
801  'vs',
802  '2.',
803  'WARNING:',
804  'Results',
805  'unreliable!',
806  '9+',
807  'repetitions',
808  'recommended.'],
809  ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
810  ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
811  ['short_pvalue',
812  '0.7671',
813  '0.2000',
814  'U',
815  'Test,',
816  'Repetitions:',
817  '2',
818  'vs',
819  '3.',
820  'WARNING:',
821  'Results',
822  'unreliable!',
823  '9+',
824  'repetitions',
825  'recommended.'],
826  ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53']
827  ]
828  output_lines_with_header = print_difference_report(
829  self.json_diff_report,
830  utest=True, utest_alpha=0.05, use_color=False)
831  output_lines = output_lines_with_header[2:]
832  print("\n")
833  print("\n".join(output_lines_with_header))
834  self.assertEqual(len(output_lines), len(expect_lines))
835  for i in range(0, len(output_lines)):
836  parts = [x for x in output_lines[i].split(' ') if x]
837  self.assertEqual(expect_lines[i], parts)
838 
840  expected_output = [
841  {
842  'name': u'BM_One',
843  'measurements': [
844  {'time': -0.1,
845  'cpu': 0.1,
846  'real_time': 10,
847  'real_time_other': 9,
848  'cpu_time': 100,
849  'cpu_time_other': 110}
850  ],
851  'time_unit': 'ns',
852  'utest': {}
853  },
854  {
855  'name': u'BM_Two',
856  'measurements': [
857  {'time': 0.1111111111111111,
858  'cpu': -0.011111111111111112,
859  'real_time': 9,
860  'real_time_other': 10,
861  'cpu_time': 90,
862  'cpu_time_other': 89},
863  {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8,
864  'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72}
865  ],
866  'time_unit': 'ns',
867  'utest': {
868  'have_optimal_repetitions': False, 'cpu_pvalue': 0.6666666666666666, 'time_pvalue': 1.0
869  }
870  },
871  {
872  'name': u'short',
873  'measurements': [
874  {'time': -0.125,
875  'cpu': -0.0625,
876  'real_time': 8,
877  'real_time_other': 7,
878  'cpu_time': 80,
879  'cpu_time_other': 75},
880  {'time': -0.4325,
881  'cpu': -0.13506493506493514,
882  'real_time': 8,
883  'real_time_other': 4.54,
884  'cpu_time': 77,
885  'cpu_time_other': 66.6}
886  ],
887  'time_unit': 'ns',
888  'utest': {
889  'have_optimal_repetitions': False, 'cpu_pvalue': 0.2, 'time_pvalue': 0.7670968684102772
890  }
891  },
892  {
893  'name': u'medium',
894  'measurements': [
895  {'real_time_other': 5,
896  'cpu_time': 80,
897  'time': -0.375,
898  'real_time': 8,
899  'cpu_time_other': 53,
900  'cpu': -0.3375
901  }
902  ],
903  'utest': {},
904  'time_unit': u'ns',
905  'aggregate_name': ''
906  }
907  ]
908  self.assertEqual(len(self.json_diff_report), len(expected_output))
909  for out, expected in zip(
910  self.json_diff_report, expected_output):
911  self.assertEqual(out['name'], expected['name'])
912  self.assertEqual(out['time_unit'], expected['time_unit'])
913  assert_utest(self, out, expected)
914  assert_measurements(self, out, expected)
915 
916 
917 
919  unittest.TestCase):
920  @classmethod
921  def setUpClass(cls):
922  def load_results():
923  import json
924  testInputs = os.path.join(
925  os.path.dirname(
926  os.path.realpath(__file__)),
927  'Inputs')
928  testOutput1 = os.path.join(testInputs, 'test4_run0.json')
929  testOutput2 = os.path.join(testInputs, 'test4_run1.json')
930  with open(testOutput1, 'r') as f:
931  json1 = json.load(f)
932  with open(testOutput2, 'r') as f:
933  json2 = json.load(f)
934  return json1, json2
935 
936  json1, json2 = load_results()
938  json1, json2, utest=True)
939 
941  expect_lines = [
942  ['whocares', '-0.5000', '+0.5000', '0', '0', '0', '0']
943  ]
944  output_lines_with_header = print_difference_report(
945  self.json_diff_report,
946  utest=True, utest_alpha=0.05, use_color=False)
947  output_lines = output_lines_with_header[2:]
948  print("\n")
949  print("\n".join(output_lines_with_header))
950  self.assertEqual(len(output_lines), len(expect_lines))
951  for i in range(0, len(output_lines)):
952  parts = [x for x in output_lines[i].split(' ') if x]
953  self.assertEqual(expect_lines[i], parts)
954 
956  expected_output = [
957  {
958  'name': u'whocares',
959  'measurements': [
960  {'time': -0.5,
961  'cpu': 0.5,
962  'real_time': 0.01,
963  'real_time_other': 0.005,
964  'cpu_time': 0.10,
965  'cpu_time_other': 0.15}
966  ],
967  'time_unit': 'ns',
968  'utest': {}
969  }
970  ]
971  self.assertEqual(len(self.json_diff_report), len(expected_output))
972  for out, expected in zip(
973  self.json_diff_report, expected_output):
974  self.assertEqual(out['name'], expected['name'])
975  self.assertEqual(out['time_unit'], expected['time_unit'])
976  assert_utest(self, out, expected)
977  assert_measurements(self, out, expected)
978 
979 
980 class TestReportSorting(unittest.TestCase):
981  @classmethod
982  def setUpClass(cls):
983  def load_result():
984  import json
985  testInputs = os.path.join(
986  os.path.dirname(
987  os.path.realpath(__file__)),
988  'Inputs')
989  testOutput = os.path.join(testInputs, 'test4_run.json')
990  with open(testOutput, 'r') as f:
991  json = json.load(f)
992  return json
993 
994  cls.json = load_result()
995 
997  import util
998 
999  expected_names = [
1000  "99 family 0 instance 0 repetition 0",
1001  "98 family 0 instance 0 repetition 1",
1002  "97 family 0 instance 0 aggregate",
1003  "96 family 0 instance 1 repetition 0",
1004  "95 family 0 instance 1 repetition 1",
1005  "94 family 0 instance 1 aggregate",
1006  "93 family 1 instance 0 repetition 0",
1007  "92 family 1 instance 0 repetition 1",
1008  "91 family 1 instance 0 aggregate",
1009  "90 family 1 instance 1 repetition 0",
1010  "89 family 1 instance 1 repetition 1",
1011  "88 family 1 instance 1 aggregate"
1012  ]
1013 
1014  for n in range(len(self.json['benchmarks']) ** 2):
1015  random.shuffle(self.json['benchmarks'])
1016  sorted_benchmarks = util.sort_benchmark_results(self.json)[
1017  'benchmarks']
1018  self.assertEqual(len(expected_names), len(sorted_benchmarks))
1019  for out, expected in zip(sorted_benchmarks, expected_names):
1020  self.assertEqual(out['name'], expected)
1021 
1022 
1023 def assert_utest(unittest_instance, lhs, rhs):
1024  if lhs['utest']:
1025  unittest_instance.assertAlmostEqual(
1026  lhs['utest']['cpu_pvalue'],
1027  rhs['utest']['cpu_pvalue'])
1028  unittest_instance.assertAlmostEqual(
1029  lhs['utest']['time_pvalue'],
1030  rhs['utest']['time_pvalue'])
1031  unittest_instance.assertEqual(
1032  lhs['utest']['have_optimal_repetitions'],
1033  rhs['utest']['have_optimal_repetitions'])
1034  else:
1035  # lhs is empty. assert if rhs is not.
1036  unittest_instance.assertEqual(lhs['utest'], rhs['utest'])
1037 
1038 
1039 def assert_measurements(unittest_instance, lhs, rhs):
1040  for m1, m2 in zip(lhs['measurements'], rhs['measurements']):
1041  unittest_instance.assertEqual(m1['real_time'], m2['real_time'])
1042  unittest_instance.assertEqual(m1['cpu_time'], m2['cpu_time'])
1043  # m1['time'] and m1['cpu'] hold values which are being calculated,
1044  # and therefore we must use almost-equal pattern.
1045  unittest_instance.assertAlmostEqual(m1['time'], m2['time'], places=4)
1046  unittest_instance.assertAlmostEqual(m1['cpu'], m2['cpu'], places=4)
1047 
1048 
1049 if __name__ == '__main__':
1050  unittest.main()
1051 
1052 # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
1053 # kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
1054 # kate: indent-mode python; remove-trailing-spaces modified;
gbench.report.TestReportDifferenceBetweenFamilies.test_json_diff_report_pretty_printing
def test_json_diff_report_pretty_printing(self)
Definition: benchmark/tools/gbench/report.py:521
gbench.report.TestReportDifferenceForPercentageAggregates.setUpClass
def setUpClass(cls)
Definition: benchmark/tools/gbench/report.py:921
http2_test_server.format
format
Definition: http2_test_server.py:118
gbench.report.TestReportDifference.test_json_diff_report_pretty_printing
def test_json_diff_report_pretty_printing(self)
Definition: benchmark/tools/gbench/report.py:389
gbench.report.color_format
def color_format(use_color, fmt_str, *args, **kwargs)
Definition: benchmark/tools/gbench/report.py:45
gbench.report.TestReportDifferenceWithUTest.test_json_diff_report
def test_json_diff_report(self)
Definition: benchmark/tools/gbench/report.py:691
gbench.report.TestReportDifference.setUpClass
def setUpClass(cls)
Definition: benchmark/tools/gbench/report.py:371
gbench.report.TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly
Definition: benchmark/tools/gbench/report.py:768
capstone.range
range
Definition: third_party/bloaty/third_party/capstone/bindings/python/capstone/__init__.py:6
gbench.report.TestReportSorting
Definition: benchmark/tools/gbench/report.py:980
gbench.report.BenchmarkColor.name
name
Definition: benchmark/tools/gbench/report.py:15
gbench.report.TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly.test_json_diff_report
def test_json_diff_report(self)
Definition: benchmark/tools/gbench/report.py:839
gbench.report.extract_field
def extract_field(partition, field_name)
Definition: benchmark/tools/gbench/report.py:153
gbench.report.TestReportDifferenceBetweenFamilies
Definition: benchmark/tools/gbench/report.py:502
gbench.report.partition_benchmarks
def partition_benchmarks(json1, json2)
Definition: benchmark/tools/gbench/report.py:123
gbench.report.BenchmarkColor.__init__
def __init__(self, name, code)
Definition: benchmark/tools/gbench/report.py:14
gbench.report.TestReportDifferenceForPercentageAggregates.test_json_diff_report_pretty_printing
def test_json_diff_report_pretty_printing(self)
Definition: benchmark/tools/gbench/report.py:940
gbench.report.TestReportDifferenceWithUTest.test_json_diff_report_pretty_printing
def test_json_diff_report_pretty_printing(self)
Definition: benchmark/tools/gbench/report.py:596
gbench.report.TestReportDifferenceWithUTest.json_diff_report
json_diff_report
Definition: benchmark/tools/gbench/report.py:593
gbench.report.BenchmarkColor.code
code
Definition: benchmark/tools/gbench/report.py:16
gbench.report.TestReportDifference
Definition: benchmark/tools/gbench/report.py:369
gbench.report.TestReportSorting.test_json_diff_report_pretty_printing
def test_json_diff_report_pretty_printing(self)
Definition: benchmark/tools/gbench/report.py:996
gbench.report.TestReportDifference.test_json_diff_report_output
def test_json_diff_report_output(self)
Definition: benchmark/tools/gbench/report.py:418
max
int max
Definition: bloaty/third_party/zlib/examples/enough.c:170
gbench.report.TestReportDifferenceBetweenFamilies.test_json_diff_report
def test_json_diff_report(self)
Definition: benchmark/tools/gbench/report.py:539
gbench.report.BenchmarkColor
Definition: benchmark/tools/gbench/report.py:13
gbench.report.intersect
def intersect(list1, list2)
Definition: benchmark/tools/gbench/report.py:111
gbench.report.BenchmarkColor.__format__
def __format__(self, format)
Definition: benchmark/tools/gbench/report.py:22
gbench.report.assert_measurements
def assert_measurements(unittest_instance, lhs, rhs)
Definition: benchmark/tools/gbench/report.py:1039
gbench.report.TestReportDifferenceForPercentageAggregates.test_json_diff_report
def test_json_diff_report(self)
Definition: benchmark/tools/gbench/report.py:955
gbench.report.assert_utest
def assert_utest(unittest_instance, lhs, rhs)
Definition: benchmark/tools/gbench/report.py:1023
gbench.report.calc_utest
def calc_utest(timings_cpu, timings_time)
Definition: benchmark/tools/gbench/report.py:160
gbench.report.get_difference_report
def get_difference_report(json1, json2, utest=False)
Definition: benchmark/tools/gbench/report.py:210
gbench.report.TestReportDifference.load_results
def load_results(self)
Definition: bloaty/third_party/protobuf/third_party/benchmark/tools/gbench/report.py:113
gbench.report.BenchmarkColor.__repr__
def __repr__(self)
Definition: benchmark/tools/gbench/report.py:18
gbench.report.print_difference_report
def print_difference_report(json_diff_report, include_aggregates_only=False, utest=False, utest_alpha=0.05, use_color=True)
Definition: benchmark/tools/gbench/report.py:274
gbench.report.is_potentially_comparable_benchmark
def is_potentially_comparable_benchmark(x)
Definition: benchmark/tools/gbench/report.py:119
gbench.report.TestReportDifferenceWithUTest.test_json_diff_report_pretty_printing_aggregates_only
def test_json_diff_report_pretty_printing_aggregates_only(self)
Definition: benchmark/tools/gbench/report.py:645
min
#define min(a, b)
Definition: qsort.h:83
gbench.report.TestGetUniqueBenchmarkNames.test_basic
def test_basic(self)
Definition: benchmark/tools/gbench/report.py:353
gbench.report.TestReportSorting.json
json
Definition: benchmark/tools/gbench/report.py:994
gbench.report.TestGetUniqueBenchmarkNames
Unit tests.
Definition: benchmark/tools/gbench/report.py:341
gbench.report.get_unique_benchmark_names
def get_unique_benchmark_names(json)
Definition: benchmark/tools/gbench/report.py:100
gbench.report.TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly.setUpClass
def setUpClass(cls)
Definition: benchmark/tools/gbench/report.py:770
gbench.report.find_longest_name
def find_longest_name(benchmark_list)
Definition: benchmark/tools/gbench/report.py:61
gbench.report.TestGetUniqueBenchmarkNames.load_results
def load_results(self)
Definition: benchmark/tools/gbench/report.py:342
gbench.report.calculate_change
def calculate_change(old_val, new_val)
Definition: benchmark/tools/gbench/report.py:73
gbench.report.TestReportDifferenceBetweenFamilies.setUpClass
def setUpClass(cls)
Definition: benchmark/tools/gbench/report.py:504
gbench.report.TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly.test_json_diff_report_pretty_printing
def test_json_diff_report_pretty_printing(self)
Definition: benchmark/tools/gbench/report.py:789
gbench.report.TestReportDifferenceWithUTest
Definition: benchmark/tools/gbench/report.py:575
cpp.gmock_class.set
set
Definition: bloaty/third_party/googletest/googlemock/scripts/generator/cpp/gmock_class.py:44
gbench.report.TestReportDifference.json_diff_report
json_diff_report
Definition: benchmark/tools/gbench/report.py:387
gbench.report.TestReportDifferenceForPercentageAggregates
Definition: benchmark/tools/gbench/report.py:919
gbench.report.TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly.json_diff_report
json_diff_report
Definition: benchmark/tools/gbench/report.py:786
gbench.report.TestReportDifferenceWithUTest.setUpClass
def setUpClass(cls)
Definition: benchmark/tools/gbench/report.py:577
open
#define open
Definition: test-fs.c:46
gbench.report.TestReportDifferenceForPercentageAggregates.json_diff_report
json_diff_report
Definition: benchmark/tools/gbench/report.py:937
gbench.report.print_utest
def print_utest(bc_name, utest, utest_alpha, first_col_width, use_color=True)
Definition: benchmark/tools/gbench/report.py:177
gbench.report.TestReportSorting.setUpClass
def setUpClass(cls)
Definition: benchmark/tools/gbench/report.py:982
len
int len
Definition: abseil-cpp/absl/base/internal/low_level_alloc_test.cc:46
split
static void split(const char *s, char ***ss, size_t *ns)
Definition: debug/trace.cc:111
gbench.report.filter_benchmark
def filter_benchmark(json_orig, family, replacement="")
Definition: benchmark/tools/gbench/report.py:84
gbench.report.TestReportDifferenceBetweenFamilies.json_diff_report
json_diff_report
Definition: benchmark/tools/gbench/report.py:519


grpc
Author(s):
autogenerated on Fri May 16 2025 03:00:04