statistics.cc
Go to the documentation of this file.
1 // Copyright 2016 Ismael Jimenez Martinez. All rights reserved.
2 // Copyright 2017 Roman Lebedev. All rights reserved.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 
16 #include "benchmark/benchmark.h"
17 
18 #include <algorithm>
19 #include <cmath>
20 #include <numeric>
21 #include <string>
22 #include <vector>
23 #include "check.h"
24 #include "statistics.h"
25 
26 namespace benchmark {
27 
28 auto StatisticsSum = [](const std::vector<double>& v) {
29  return std::accumulate(v.begin(), v.end(), 0.0);
30 };
31 
32 double StatisticsMean(const std::vector<double>& v) {
33  if (v.empty()) return 0.0;
34  return StatisticsSum(v) * (1.0 / v.size());
35 }
36 
37 double StatisticsMedian(const std::vector<double>& v) {
38  if (v.size() < 3) return StatisticsMean(v);
39  std::vector<double> copy(v);
40 
41  auto center = copy.begin() + v.size() / 2;
42  std::nth_element(copy.begin(), center, copy.end());
43 
44  // did we have an odd number of samples?
45  // if yes, then center is the median
46  // it no, then we are looking for the average between center and the value
47  // before
48  if (v.size() % 2 == 1) return *center;
49  auto center2 = copy.begin() + v.size() / 2 - 1;
50  std::nth_element(copy.begin(), center2, copy.end());
51  return (*center + *center2) / 2.0;
52 }
53 
54 // Return the sum of the squares of this sample set
55 auto SumSquares = [](const std::vector<double>& v) {
56  return std::inner_product(v.begin(), v.end(), v.begin(), 0.0);
57 };
58 
59 auto Sqr = [](const double dat) { return dat * dat; };
60 auto Sqrt = [](const double dat) {
61  // Avoid NaN due to imprecision in the calculations
62  if (dat < 0.0) return 0.0;
63  return std::sqrt(dat);
64 };
65 
66 double StatisticsStdDev(const std::vector<double>& v) {
67  const auto mean = StatisticsMean(v);
68  if (v.empty()) return mean;
69 
70  // Sample standard deviation is undefined for n = 1
71  if (v.size() == 1) return 0.0;
72 
73  const double avg_squares = SumSquares(v) * (1.0 / v.size());
74  return Sqrt(v.size() / (v.size() - 1.0) * (avg_squares - Sqr(mean)));
75 }
76 
77 double StatisticsCV(const std::vector<double>& v) {
78  if (v.size() < 2) return 0.0;
79 
80  const auto stddev = StatisticsStdDev(v);
81  const auto mean = StatisticsMean(v);
82 
83  return stddev / mean;
84 }
85 
86 std::vector<BenchmarkReporter::Run> ComputeStats(
87  const std::vector<BenchmarkReporter::Run>& reports) {
89  std::vector<Run> results;
90 
91  auto error_count =
92  std::count_if(reports.begin(), reports.end(),
93  [](Run const& run) { return run.error_occurred; });
94 
95  if (reports.size() - error_count < 2) {
96  // We don't report aggregated data if there was a single run.
97  return results;
98  }
99 
100  // Accumulators.
101  std::vector<double> real_accumulated_time_stat;
102  std::vector<double> cpu_accumulated_time_stat;
103 
104  real_accumulated_time_stat.reserve(reports.size());
105  cpu_accumulated_time_stat.reserve(reports.size());
106 
107  // All repetitions should be run with the same number of iterations so we
108  // can take this information from the first benchmark.
109  const IterationCount run_iterations = reports.front().iterations;
110  // create stats for user counters
111  struct CounterStat {
112  Counter c;
113  std::vector<double> s;
114  };
115  std::map<std::string, CounterStat> counter_stats;
116  for (Run const& r : reports) {
117  for (auto const& cnt : r.counters) {
118  auto it = counter_stats.find(cnt.first);
119  if (it == counter_stats.end()) {
120  counter_stats.insert({cnt.first, {cnt.second, std::vector<double>{}}});
121  it = counter_stats.find(cnt.first);
122  it->second.s.reserve(reports.size());
123  } else {
124  BM_CHECK_EQ(counter_stats[cnt.first].c.flags, cnt.second.flags);
125  }
126  }
127  }
128 
129  // Populate the accumulators.
130  for (Run const& run : reports) {
131  BM_CHECK_EQ(reports[0].benchmark_name(), run.benchmark_name());
132  BM_CHECK_EQ(run_iterations, run.iterations);
133  if (run.error_occurred) continue;
134  real_accumulated_time_stat.emplace_back(run.real_accumulated_time);
135  cpu_accumulated_time_stat.emplace_back(run.cpu_accumulated_time);
136  // user counters
137  for (auto const& cnt : run.counters) {
138  auto it = counter_stats.find(cnt.first);
139  BM_CHECK_NE(it, counter_stats.end());
140  it->second.s.emplace_back(cnt.second);
141  }
142  }
143 
144  // Only add label if it is same for all runs
145  std::string report_label = reports[0].report_label;
146  for (std::size_t i = 1; i < reports.size(); i++) {
147  if (reports[i].report_label != report_label) {
148  report_label = "";
149  break;
150  }
151  }
152 
153  const double iteration_rescale_factor =
154  double(reports.size()) / double(run_iterations);
155 
156  for (const auto& Stat : *reports[0].statistics) {
157  // Get the data from the accumulator to BenchmarkReporter::Run's.
158  Run data;
159  data.run_name = reports[0].run_name;
160  data.family_index = reports[0].family_index;
161  data.per_family_instance_index = reports[0].per_family_instance_index;
163  data.threads = reports[0].threads;
164  data.repetitions = reports[0].repetitions;
165  data.repetition_index = Run::no_repetition_index;
166  data.aggregate_name = Stat.name_;
167  data.aggregate_unit = Stat.unit_;
168  data.report_label = report_label;
169 
170  // It is incorrect to say that an aggregate is computed over
171  // run's iterations, because those iterations already got averaged.
172  // Similarly, if there are N repetitions with 1 iterations each,
173  // an aggregate will be computed over N measurements, not 1.
174  // Thus it is best to simply use the count of separate reports.
175  data.iterations = reports.size();
176 
177  data.real_accumulated_time = Stat.compute_(real_accumulated_time_stat);
178  data.cpu_accumulated_time = Stat.compute_(cpu_accumulated_time_stat);
179 
180  if (data.aggregate_unit == StatisticUnit::kTime) {
181  // We will divide these times by data.iterations when reporting, but the
182  // data.iterations is not necessarily the scale of these measurements,
183  // because in each repetition, these timers are sum over all the iters.
184  // And if we want to say that the stats are over N repetitions and not
185  // M iterations, we need to multiply these by (N/M).
186  data.real_accumulated_time *= iteration_rescale_factor;
187  data.cpu_accumulated_time *= iteration_rescale_factor;
188  }
189 
190  data.time_unit = reports[0].time_unit;
191 
192  // user counters
193  for (auto const& kv : counter_stats) {
194  // Do *NOT* rescale the custom counters. They are already properly scaled.
195  const auto uc_stat = Stat.compute_(kv.second.s);
196  auto c = Counter(uc_stat, counter_stats[kv.first].c.flags,
197  counter_stats[kv.first].c.oneK);
198  data.counters[kv.first] = c;
199  }
200 
201  results.push_back(data);
202  }
203 
204  return results;
205 }
206 
207 } // end namespace benchmark
benchmark::kTime
@ kTime
Definition: benchmark/include/benchmark/benchmark.h:453
benchmark::BenchmarkReporter::Run::RT_Aggregate
@ RT_Aggregate
Definition: benchmark/include/benchmark/benchmark.h:1425
regen-readme.it
it
Definition: regen-readme.py:15
benchmark::Sqr
auto Sqr
Definition: statistics.cc:59
check.h
benchmark
Definition: bm_alarm.cc:55
benchmark::StatisticsSum
auto StatisticsSum
Definition: statistics.cc:28
copy
static int copy(grpc_slice_buffer *input, grpc_slice_buffer *output)
Definition: message_compress.cc:145
testing::internal::string
::std::string string
Definition: bloaty/third_party/protobuf/third_party/googletest/googletest/include/gtest/internal/gtest-port.h:881
statistics.h
benchmark::StatisticsStdDev
double StatisticsStdDev(const std::vector< double > &v)
Definition: statistics.cc:66
benchmark::IterationCount
uint64_t IterationCount
Definition: benchmark/include/benchmark/benchmark.h:451
setup.v
v
Definition: third_party/bloaty/third_party/capstone/bindings/python/setup.py:42
benchmarks.python.py_benchmark.results
list results
Definition: bloaty/third_party/protobuf/benchmarks/python/py_benchmark.py:145
BM_CHECK_NE
#define BM_CHECK_NE(a, b)
Definition: benchmark/src/check.h:69
testing::internal::posix::Stat
int Stat(const char *path, StatStruct *buf)
Definition: bloaty/third_party/googletest/googletest/include/gtest/internal/gtest-port.h:2008
data
char data[kBufferLength]
Definition: abseil-cpp/absl/strings/internal/str_format/float_conversion.cc:1006
benchmark::Counter
Definition: benchmark/include/benchmark/benchmark.h:382
benchmark::BenchmarkReporter::Run
Definition: benchmark/include/benchmark/benchmark.h:1423
BM_CHECK_EQ
#define BM_CHECK_EQ(a, b)
Definition: benchmark/src/check.h:68
client.run
def run()
Definition: examples/python/async_streaming/client.py:109
benchmark::StatisticsCV
double StatisticsCV(const std::vector< double > &v)
Definition: statistics.cc:77
benchmark::ComputeStats
std::vector< BenchmarkReporter::Run > ComputeStats(const std::vector< BenchmarkReporter::Run > &reports)
Definition: statistics.cc:86
fix_build_deps.r
r
Definition: fix_build_deps.py:491
accumulate
static void accumulate(upb_pb_encoder *e)
Definition: bloaty/third_party/protobuf/php/ext/google/protobuf/upb.c:7694
benchmark::SumSquares
auto SumSquares
Definition: statistics.cc:55
benchmark::StatisticsMean
double StatisticsMean(const std::vector< double > &v)
Definition: statistics.cc:32
googletest-break-on-failure-unittest.Run
def Run(command)
Definition: bloaty/third_party/googletest/googletest/test/googletest-break-on-failure-unittest.py:76
benchmark::Sqrt
auto Sqrt
Definition: statistics.cc:60
i
uint64_t i
Definition: abseil-cpp/absl/container/btree_benchmark.cc:230
benchmark::StatisticsMedian
double StatisticsMedian(const std::vector< double > &v)
Definition: statistics.cc:37


grpc
Author(s):
autogenerated on Thu Mar 13 2025 03:01:22