bm_diff.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 #
3 # Copyright 2017 gRPC authors.
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 """ Computes the diff between two bm runs and outputs significant results """
17 
18 import argparse
19 import collections
20 import json
21 import os
22 import subprocess
23 import sys
24 
25 sys.path.append(os.path.join(os.path.dirname(sys.argv[0]), '..'))
26 
27 import bm_constants
28 import bm_json
29 import bm_speedup
30 import tabulate
31 
32 verbose = False
33 
34 
35 def _median(ary):
36  assert (len(ary))
37  ary = sorted(ary)
38  n = len(ary)
39  if n % 2 == 0:
40  return (ary[(n - 1) // 2] + ary[(n - 1) // 2 + 1]) / 2.0
41  else:
42  return ary[n // 2]
43 
44 
45 def _args():
46  argp = argparse.ArgumentParser(
47  description='Perform diff on microbenchmarks')
48  argp.add_argument('-t',
49  '--track',
50  choices=sorted(bm_constants._INTERESTING),
51  nargs='+',
52  default=sorted(bm_constants._INTERESTING),
53  help='Which metrics to track')
54  argp.add_argument('-b',
55  '--benchmarks',
56  nargs='+',
57  choices=bm_constants._AVAILABLE_BENCHMARK_TESTS,
58  default=bm_constants._AVAILABLE_BENCHMARK_TESTS,
59  help='Which benchmarks to run')
60  argp.add_argument(
61  '-l',
62  '--loops',
63  type=int,
64  default=20,
65  help=
66  'Number of times to loops the benchmarks. Must match what was passed to bm_run.py'
67  )
68  argp.add_argument('-r',
69  '--regex',
70  type=str,
71  default="",
72  help='Regex to filter benchmarks run')
73  argp.add_argument('--counters', dest='counters', action='store_true')
74  argp.add_argument('--no-counters', dest='counters', action='store_false')
75  argp.set_defaults(counters=True)
76  argp.add_argument('-n', '--new', type=str, help='New benchmark name')
77  argp.add_argument('-o', '--old', type=str, help='Old benchmark name')
78  argp.add_argument('-v',
79  '--verbose',
80  type=bool,
81  help='Print details of before/after')
82  args = argp.parse_args()
83  global verbose
84  if args.verbose:
85  verbose = True
86  assert args.new
87  assert args.old
88  return args
89 
90 
91 def _maybe_print(str):
92  if verbose:
93  print(str)
94 
95 
96 class Benchmark:
97 
98  def __init__(self):
99  self.samples = {
100  True: collections.defaultdict(list),
101  False: collections.defaultdict(list)
102  }
103  self.final = {}
104  self.speedup = {}
105 
106  def add_sample(self, track, data, new):
107  for f in track:
108  if f in data:
109  self.samples[new][f].append(float(data[f]))
110 
111  def process(self, track, new_name, old_name):
112  for f in sorted(track):
113  new = self.samples[True][f]
114  old = self.samples[False][f]
115  if not new or not old:
116  continue
117  mdn_diff = abs(_median(new) - _median(old))
118  _maybe_print('%s: %s=%r %s=%r mdn_diff=%r' %
119  (f, new_name, new, old_name, old, mdn_diff))
120  s = bm_speedup.speedup(new, old, 1e-5)
121  self.speedup[f] = s
122  if abs(s) > 3:
123  if mdn_diff > 0.5:
124  self.final[f] = '%+d%%' % s
125  return self.final.keys()
126 
127  def skip(self):
128  return not self.final
129 
130  def row(self, flds):
131  return [self.final[f] if f in self.final else '' for f in flds]
132 
133  def speedup(self, name):
134  if name in self.speedup:
135  return self.speedup[name]
136  return None
137 
138 
139 def _read_json(filename, badjson_files, nonexistant_files):
140  stripped = ".".join(filename.split(".")[:-2])
141  try:
142  with open(filename) as f:
143  r = f.read()
144  return json.loads(r)
145  except IOError as e:
146  if stripped in nonexistant_files:
147  nonexistant_files[stripped] += 1
148  else:
149  nonexistant_files[stripped] = 1
150  return None
151  except ValueError as e:
152  print(r)
153  if stripped in badjson_files:
154  badjson_files[stripped] += 1
155  else:
156  badjson_files[stripped] = 1
157  return None
158 
159 
160 def fmt_dict(d):
161  return ''.join([" " + k + ": " + str(d[k]) + "\n" for k in d])
162 
163 
164 def diff(bms, loops, regex, track, old, new, counters):
165  benchmarks = collections.defaultdict(Benchmark)
166 
167  badjson_files = {}
168  nonexistant_files = {}
169  for bm in bms:
170  for loop in range(0, loops):
171  for line in subprocess.check_output([
172  'bm_diff_%s/opt/%s' % (old, bm), '--benchmark_list_tests',
173  '--benchmark_filter=%s' % regex
174  ]).splitlines():
175  line = line.decode('UTF-8')
176  stripped_line = line.strip().replace("/", "_").replace(
177  "<", "_").replace(">", "_").replace(", ", "_")
178  js_new_opt = _read_json(
179  '%s.%s.opt.%s.%d.json' % (bm, stripped_line, new, loop),
180  badjson_files, nonexistant_files)
181  js_old_opt = _read_json(
182  '%s.%s.opt.%s.%d.json' % (bm, stripped_line, old, loop),
183  badjson_files, nonexistant_files)
184  if counters:
185  js_new_ctr = _read_json(
186  '%s.%s.counters.%s.%d.json' %
187  (bm, stripped_line, new, loop), badjson_files,
188  nonexistant_files)
189  js_old_ctr = _read_json(
190  '%s.%s.counters.%s.%d.json' %
191  (bm, stripped_line, old, loop), badjson_files,
192  nonexistant_files)
193  else:
194  js_new_ctr = None
195  js_old_ctr = None
196 
197  for row in bm_json.expand_json(js_new_ctr, js_new_opt):
198  name = row['cpp_name']
199  if name.endswith('_mean') or name.endswith('_stddev'):
200  continue
201  benchmarks[name].add_sample(track, row, True)
202  for row in bm_json.expand_json(js_old_ctr, js_old_opt):
203  name = row['cpp_name']
204  if name.endswith('_mean') or name.endswith('_stddev'):
205  continue
206  benchmarks[name].add_sample(track, row, False)
207 
208  really_interesting = set()
209  for name, bm in benchmarks.items():
210  _maybe_print(name)
211  really_interesting.update(bm.process(track, new, old))
212  fields = [f for f in track if f in really_interesting]
213 
214  # figure out the significance of the changes... right now we take the 95%-ile
215  # benchmark delta %-age, and then apply some hand chosen thresholds
216  histogram = []
217  _NOISY = ["BM_WellFlushed"]
218  for name, bm in benchmarks.items():
219  if name in _NOISY:
220  print("skipping noisy benchmark '%s' for labelling evaluation" %
221  name)
222  if bm.skip():
223  continue
224  d = bm.speedup['cpu_time']
225  if d is None:
226  continue
227  histogram.append(d)
228  histogram.sort()
229  print("histogram of speedups: ", histogram)
230  if len(histogram) == 0:
231  significance = 0
232  else:
233  delta = histogram[int(len(histogram) * 0.95)]
234  mul = 1
235  if delta < 0:
236  delta = -delta
237  mul = -1
238  if delta < 2:
239  significance = 0
240  elif delta < 5:
241  significance = 1
242  elif delta < 10:
243  significance = 2
244  else:
245  significance = 3
246  significance *= mul
247 
248  headers = ['Benchmark'] + fields
249  rows = []
250  for name in sorted(benchmarks.keys()):
251  if benchmarks[name].skip():
252  continue
253  rows.append([name] + benchmarks[name].row(fields))
254  note = None
255  if len(badjson_files):
256  note = 'Corrupt JSON data (indicates timeout or crash): \n%s' % fmt_dict(
257  badjson_files)
258  if len(nonexistant_files):
259  if note:
260  note += '\n\nMissing files (indicates new benchmark): \n%s' % fmt_dict(
261  nonexistant_files)
262  else:
263  note = '\n\nMissing files (indicates new benchmark): \n%s' % fmt_dict(
264  nonexistant_files)
265  if rows:
266  return tabulate.tabulate(rows, headers=headers,
267  floatfmt='+.2f'), note, significance
268  else:
269  return None, note, 0
270 
271 
272 if __name__ == '__main__':
273  args = _args()
274  diff, note = diff(args.benchmarks, args.loops, args.regex, args.track,
275  args.old, args.new, args.counters)
276  print('%s\n%s' % (note, diff if diff else "No performance differences"))
xds_interop_client.str
str
Definition: xds_interop_client.py:487
mkowners.skip
bool skip
Definition: mkowners.py:224
keys
const void * keys
Definition: abseil-cpp/absl/random/internal/randen.cc:49
capstone.range
range
Definition: third_party/bloaty/third_party/capstone/bindings/python/capstone/__init__.py:6
bm_diff.Benchmark
Definition: bm_diff.py:96
bm_diff._maybe_print
def _maybe_print(str)
Definition: bm_diff.py:91
bm_diff.Benchmark.final
final
Definition: bm_diff.py:103
bm_diff._args
def _args()
Definition: bm_diff.py:45
bm_diff.Benchmark.__init__
def __init__(self)
Definition: bm_diff.py:98
bm_diff.Benchmark.speedup
speedup
Definition: bm_diff.py:104
bm_speedup.speedup
def speedup(new, old, threshold=_DEFAULT_THRESHOLD)
Definition: bm_speedup.py:32
xds_interop_client.int
int
Definition: xds_interop_client.py:113
bm_diff.Benchmark.row
def row(self, flds)
Definition: bm_diff.py:130
bm_diff.diff
diff
Definition: bm_diff.py:274
bm_diff.Benchmark.process
def process(self, track, new_name, old_name)
Definition: bm_diff.py:111
bm_json.expand_json
def expand_json(js, js2=None)
Definition: bm_json.py:181
bm_diff._median
def _median(ary)
Definition: bm_diff.py:35
cpp.gmock_class.set
set
Definition: bloaty/third_party/googletest/googlemock/scripts/generator/cpp/gmock_class.py:44
bm_diff.Benchmark.skip
def skip(self)
Definition: bm_diff.py:127
bm_diff.fmt_dict
def fmt_dict(d)
Definition: bm_diff.py:160
open
#define open
Definition: test-fs.c:46
bm_diff.Benchmark.samples
samples
Definition: bm_diff.py:99
len
int len
Definition: abseil-cpp/absl/base/internal/low_level_alloc_test.cc:46
bm_diff._read_json
def _read_json(filename, badjson_files, nonexistant_files)
Definition: bm_diff.py:139
bm_diff.Benchmark.add_sample
def add_sample(self, track, data, new)
Definition: bm_diff.py:106


grpc
Author(s):
autogenerated on Thu Mar 13 2025 02:58:39