Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef BENCHPRESS_HPP
00023 #define BENCHPRESS_HPP
00024
00025 #include <algorithm>
00026 #include <atomic>
00027 #include <chrono>
00028 #include <functional>
00029 #include <iomanip>
00030 #include <iostream>
00031 #include <regex>
00032 #include <sstream>
00033 #include <string>
00034 #include <thread>
00035 #include <vector>
00036
00037 namespace benchpress {
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052 class options {
00053 std::string d_bench;
00054 size_t d_benchtime;
00055 size_t d_cpu;
00056 public:
00057 options()
00058 : d_bench(".*")
00059 , d_benchtime(1)
00060 , d_cpu(std::thread::hardware_concurrency())
00061 {}
00062 options& bench(const std::string& bench) {
00063 d_bench = bench;
00064 return *this;
00065 }
00066 options& benchtime(size_t benchtime) {
00067 d_benchtime = benchtime;
00068 return *this;
00069 }
00070 options& cpu(size_t cpu) {
00071 d_cpu = cpu;
00072 return *this;
00073 }
00074 std::string get_bench() const {
00075 return d_bench;
00076 }
00077 size_t get_benchtime() const {
00078 return d_benchtime;
00079 }
00080 size_t get_cpu() const {
00081 return d_cpu;
00082 }
00083 };
00084
00085 class context;
00086
00087
00088
00089
00090
00091
00092
00093
00094 class benchmark_info {
00095 std::string d_name;
00096 std::function<void(context*)> d_func;
00097
00098 public:
00099 benchmark_info(std::string name, std::function<void(context*)> func)
00100 : d_name(name)
00101 , d_func(func)
00102 {}
00103
00104 std::string get_name() const { return d_name; }
00105 std::function<void(context*)> get_func() const { return d_func; }
00106 };
00107
00108
00109
00110
00111
00112
00113
00114 class registration {
00115 static registration* d_this;
00116 std::vector<benchmark_info> d_benchmarks;
00117
00118 public:
00119 static registration* get_ptr() {
00120 if (nullptr == d_this) {
00121 d_this = new registration();
00122 }
00123 return d_this;
00124 }
00125
00126 void register_benchmark(benchmark_info& info) {
00127 d_benchmarks.push_back(info);
00128 }
00129
00130 std::vector<benchmark_info> get_benchmarks() { return d_benchmarks; }
00131 };
00132
00133
00134
00135
00136 class auto_register {
00137 public:
00138 auto_register(const std::string& name, std::function<void(context*)> func) {
00139 benchmark_info info(name, func);
00140 registration::get_ptr()->register_benchmark(info);
00141 }
00142 };
00143
00144 #define CONCAT(x, y) x ## y
00145 #define CONCAT2(x, y) CONCAT(x, y)
00146
00147
00148
00149 #define BENCHMARK(x, f) benchpress::auto_register CONCAT2(register_, __LINE__)((x), (f));
00150
00151
00152 #define DISABLE_REDUNDANT_CODE_OPT() { asm(""); }
00153
00154
00155
00156
00157 class result {
00158 size_t d_num_iterations;
00159 std::chrono::nanoseconds d_duration;
00160 size_t d_num_bytes;
00161
00162 public:
00163 result(size_t num_iterations, std::chrono::nanoseconds duration, size_t num_bytes)
00164 : d_num_iterations(num_iterations)
00165 , d_duration(duration)
00166 , d_num_bytes(num_bytes)
00167 {}
00168
00169 size_t get_ns_per_op() const {
00170 if (d_num_iterations <= 0) {
00171 return 0;
00172 }
00173 return d_duration.count() / d_num_iterations;
00174 }
00175
00176 double get_mb_per_s() const {
00177 if (d_num_iterations <= 0 || d_duration.count() <= 0 || d_num_bytes <= 0) {
00178 return 0;
00179 }
00180 return ((double(d_num_bytes) * double(d_num_iterations) / double(1e6)) /
00181 double(std::chrono::duration_cast<std::chrono::seconds>(d_duration).count()));
00182 }
00183
00184 std::string to_string() const {
00185 std::stringstream tmp;
00186 tmp << std::setw(12) << std::right << d_num_iterations;
00187 size_t npo = get_ns_per_op();
00188 tmp << std::setw(12) << std::right << npo << std::setw(0) << " ns/op";
00189 double mbs = get_mb_per_s();
00190 if (mbs > 0.0) {
00191 tmp << std::setw(12) << std::right << mbs << std::setw(0) << " MB/s";
00192 }
00193 return std::string(tmp.str());
00194 }
00195 };
00196
00197
00198
00199
00200 class parallel_context {
00201 std::atomic_intmax_t d_num_iterations;
00202 public:
00203 parallel_context(size_t num_iterations)
00204 : d_num_iterations(num_iterations)
00205 {}
00206
00207 bool next() {
00208 return (d_num_iterations.fetch_sub(1) > 0);
00209 }
00210 };
00211
00212
00213
00214
00215 class context {
00216 bool d_timer_on;
00217 std::chrono::high_resolution_clock::time_point d_start;
00218 std::chrono::nanoseconds d_duration;
00219 std::chrono::seconds d_benchtime;
00220 size_t d_num_iterations;
00221 size_t d_num_threads;
00222 size_t d_num_bytes;
00223 benchmark_info d_benchmark;
00224
00225 public:
00226 context(const benchmark_info& info, const options& opts)
00227 : d_timer_on(false)
00228 , d_start()
00229 , d_duration()
00230 , d_benchtime(std::chrono::seconds(opts.get_benchtime()))
00231 , d_num_iterations(1)
00232 , d_num_threads(opts.get_cpu())
00233 , d_num_bytes(0)
00234 , d_benchmark(info)
00235 {}
00236
00237 size_t num_iterations() const { return d_num_iterations; }
00238
00239 void set_num_threads(size_t n) { d_num_threads = n; }
00240 size_t num_threads() const { return d_num_threads; }
00241
00242 void start_timer() {
00243 if (!d_timer_on) {
00244 d_start = std::chrono::high_resolution_clock::now();
00245 d_timer_on = true;
00246 }
00247 }
00248 void stop_timer() {
00249 if (d_timer_on) {
00250 d_duration += std::chrono::high_resolution_clock::now() - d_start;
00251 d_timer_on = false;
00252 }
00253 }
00254 void reset_timer() {
00255 if (d_timer_on) {
00256 d_start = std::chrono::high_resolution_clock::now();
00257 }
00258 d_duration = std::chrono::nanoseconds::zero();
00259 }
00260
00261 void set_bytes(int64_t bytes) { d_num_bytes = bytes; }
00262
00263 size_t get_ns_per_op() {
00264 if (d_num_iterations <= 0) {
00265 return 0;
00266 }
00267 return d_duration.count() / d_num_iterations;
00268 }
00269
00270 void run_n(size_t n) {
00271 d_num_iterations = n;
00272 reset_timer();
00273 start_timer();
00274 d_benchmark.get_func()(this);
00275 stop_timer();
00276 }
00277
00278 void run_parallel(std::function<void(parallel_context*)> f) {
00279 parallel_context pc(d_num_iterations);
00280 std::vector<std::thread> threads;
00281 for (size_t i = 0; i < d_num_threads; ++i) {
00282 threads.push_back(std::thread([&pc,&f]() -> void {
00283 f(&pc);
00284 }));
00285 }
00286 for(auto& thread : threads){
00287 thread.join();
00288 }
00289 }
00290
00291 result run() {
00292 size_t n = 1;
00293 run_n(n);
00294 while (d_duration < d_benchtime && n < 1e9) {
00295 size_t last = n;
00296 if (get_ns_per_op() == 0) {
00297 n = 1e9;
00298 } else {
00299 n = d_duration.count() / get_ns_per_op();
00300 }
00301 n = std::max(std::min(n+n/2, 100*last), last+1);
00302 n = round_up(n);
00303 run_n(n);
00304 }
00305 return result(n, d_duration, d_num_bytes);
00306 }
00307
00308 private:
00309 template<typename T>
00310 T round_down_10(T n) {
00311 int tens = 0;
00312 while (n > 10) {
00313 n /= 10;
00314 tens++;
00315 }
00316 int result = 1;
00317 for (int i = 0; i < tens; ++i) {
00318 result *= 10;
00319 }
00320 return result;
00321 }
00322
00323 template<typename T>
00324 T round_up(T n) {
00325 T base = round_down_10(n);
00326 if (n < (2 * base)) {
00327 return 2 * base;
00328 }
00329 if (n < (5 * base)) {
00330 return 5 * base;
00331 }
00332 return 10 * base;
00333 }
00334 };
00335
00336
00337
00338
00339 void run_benchmarks(const options& opts) {
00340 std::regex match_r(opts.get_bench());
00341 auto benchmarks = registration::get_ptr()->get_benchmarks();
00342 for (auto& info : benchmarks) {
00343 if (std::regex_match(info.get_name(), match_r)) {
00344 context c(info, opts);
00345 auto r = c.run();
00346 std::cout << std::setw(35) << std::left << info.get_name() << r.to_string() << std::endl;
00347 }
00348 }
00349 }
00350
00351 }
00352
00353
00354
00355
00356
00357 #ifdef BENCHPRESS_CONFIG_MAIN
00358 #include "cxxopts.hpp"
00359 benchpress::registration* benchpress::registration::d_this;
00360 int main(int argc, char** argv) {
00361 std::chrono::high_resolution_clock::time_point bp_start = std::chrono::high_resolution_clock::now();
00362 benchpress::options bench_opts;
00363 try {
00364 cxxopts::Options cmd_opts(argv[0], " - command line options");
00365 cmd_opts.add_options()
00366 ("bench", "run benchmarks matching the regular expression", cxxopts::value<std::string>()
00367 ->default_value(".*"))
00368 ("benchtime", "run enough iterations of each benchmark to take t seconds", cxxopts::value<size_t>()
00369 ->default_value("1"))
00370 ("cpu", "specify the number of threads to use for parallel benchmarks", cxxopts::value<size_t>()
00371 ->default_value(std::to_string(std::thread::hardware_concurrency())))
00372 ("help", "print help")
00373 ;
00374 cmd_opts.parse(argc, argv);
00375 if (cmd_opts.count("help")) {
00376 std::cout << cmd_opts.help({""}) << std::endl;
00377 exit(0);
00378 }
00379 if (cmd_opts.count("bench")) {
00380 bench_opts.bench(cmd_opts["bench"].as<std::string>());
00381 }
00382 if (cmd_opts.count("benchtime")) {
00383 bench_opts.benchtime(cmd_opts["benchtime"].as<size_t>());
00384 }
00385 if (cmd_opts.count("cpu")) {
00386 bench_opts.cpu(cmd_opts["cpu"].as<size_t>());
00387 }
00388 } catch (const cxxopts::OptionException& e) {
00389 std::cout << "error parsing options: " << e.what() << std::endl;
00390 exit(1);
00391 }
00392 benchpress::run_benchmarks(bench_opts);
00393 float duration = std::chrono::duration_cast<std::chrono::milliseconds>(
00394 std::chrono::high_resolution_clock::now() - bp_start
00395 ).count() / 1000.f;
00396 std::cout << argv[0] << " " << duration << "s" << std::endl;
00397 return 0;
00398 }
00399 #endif
00400
00401 #endif // BENCHPRESS_HPP