benchpress.hpp
Go to the documentation of this file.
00001 /*
00002 * Copyright (C) 2015 Christopher Gilbert.
00003 *
00004 * Permission is hereby granted, free of charge, to any person obtaining a copy
00005 * of this software and associated documentation files (the "Software"), to deal
00006 * in the Software without restriction, including without limitation the rights
00007 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
00008 * copies of the Software, and to permit persons to whom the Software is
00009 * furnished to do so, subject to the following conditions:
00010 *
00011 * The above copyright notice and this permission notice shall be included in all
00012 * copies or substantial portions of the Software.
00013 *
00014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00015 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
00017 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
00019 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
00020 * SOFTWARE.
00021 */
00022 #ifndef BENCHPRESS_HPP
00023 #define BENCHPRESS_HPP
00024 
00025 #include <algorithm>   // max, min
00026 #include <atomic>      // atomic_intmax_t
00027 #include <chrono>      // high_resolution_timer, duration
00028 #include <functional>  // function
00029 #include <iomanip>     // setw
00030 #include <iostream>    // cout
00031 #include <regex>       // regex, regex_match
00032 #include <sstream>     // stringstream
00033 #include <string>      // string
00034 #include <thread>      // thread
00035 #include <vector>      // vector
00036 
00037 namespace benchpress {
00038 
00039 /*
00040  * The options class encapsulates all options for running benchmarks.
00041  *
00042  * When including benchpress, a main function can be emitted which includes a command-line parser for building an
00043  * options object. However from time-to-time it may be necessary for the developer to have to build their own main
00044  * stub and construct the options object manually.
00045  *
00046  * options opts;
00047  * opts
00048  *     .bench(".*")
00049  *     .benchtime(1)
00050  *     .cpu(4);
00051  */
00052 class options {
00053     std::string d_bench;
00054     size_t      d_benchtime;
00055     size_t      d_cpu;
00056 public:
00057     options()
00058         : d_bench(".*")
00059         , d_benchtime(1)
00060         , d_cpu(std::thread::hardware_concurrency())
00061     {}
00062     options& bench(const std::string& bench) {
00063         d_bench = bench;
00064         return *this;
00065     }
00066     options& benchtime(size_t benchtime) {
00067         d_benchtime = benchtime;
00068         return *this;
00069     }
00070     options& cpu(size_t cpu) {
00071         d_cpu = cpu;
00072         return *this;
00073     }
00074     std::string get_bench() const {
00075         return d_bench;
00076     }
00077     size_t get_benchtime() const {
00078         return d_benchtime;
00079     }
00080     size_t get_cpu() const {
00081         return d_cpu;
00082     }
00083 };
00084 
00085 class context;
00086 
00087 /*
00088  * The benchmark_info class is used to store a function name / pointer pair.
00089  *
00090  * benchmark_info bi("example", [](benchpress::context* b) {
00091  *     // benchmark function
00092  * });
00093  */
00094 class benchmark_info {
00095     std::string                   d_name;
00096     std::function<void(context*)> d_func;
00097 
00098 public:
00099     benchmark_info(std::string name, std::function<void(context*)> func)
00100         : d_name(name)
00101         , d_func(func)
00102     {}
00103 
00104     std::string                   get_name() const { return d_name; }
00105     std::function<void(context*)> get_func() const { return d_func; }
00106 };
00107 
00108 /*
00109  * The registration class is responsible for providing a single global point of reference for registering
00110  * benchmark functions.
00111  *
00112  * registration::get_ptr()->register_benchmark(info);
00113  */
00114 class registration {
00115     static registration*        d_this;
00116     std::vector<benchmark_info> d_benchmarks;
00117 
00118 public:
00119     static registration* get_ptr() {
00120         if (nullptr == d_this) {
00121             d_this = new registration();
00122         }
00123         return d_this;
00124     }
00125 
00126     void register_benchmark(benchmark_info& info) {
00127         d_benchmarks.push_back(info);
00128     }
00129 
00130     std::vector<benchmark_info> get_benchmarks() { return d_benchmarks; }
00131 };
00132 
00133 /*
00134  * The auto_register class is a helper used to register benchmarks.
00135  */
00136 class auto_register {
00137 public:
00138     auto_register(const std::string& name, std::function<void(context*)> func) {
00139         benchmark_info info(name, func);
00140         registration::get_ptr()->register_benchmark(info);
00141     }
00142 };
00143 
00144 #define CONCAT(x, y) x ## y
00145 #define CONCAT2(x, y) CONCAT(x, y)
00146 
00147 // The BENCHMARK macro is a helper for creating benchmark functions and automatically registering them with the
00148 // registration class.
00149 #define BENCHMARK(x, f) benchpress::auto_register CONCAT2(register_, __LINE__)((x), (f));
00150 
00151 // This macro will prevent the compiler from removing a redundant code path which has no side-effects.
00152 #define DISABLE_REDUNDANT_CODE_OPT() { asm(""); }
00153 
00154 /*
00155  * The result class is responsible for producing a printable string representation of a benchmark run.
00156  */
00157 class result {
00158     size_t                   d_num_iterations;
00159     std::chrono::nanoseconds d_duration;
00160     size_t                   d_num_bytes;
00161 
00162 public:
00163     result(size_t num_iterations, std::chrono::nanoseconds duration, size_t num_bytes)
00164         : d_num_iterations(num_iterations)
00165         , d_duration(duration)
00166         , d_num_bytes(num_bytes)
00167     {}
00168 
00169     size_t get_ns_per_op() const {
00170         if (d_num_iterations <= 0) {
00171             return 0;
00172         }
00173         return d_duration.count() / d_num_iterations;
00174     }
00175 
00176     double get_mb_per_s() const {
00177         if (d_num_iterations <= 0 || d_duration.count() <= 0 || d_num_bytes <= 0) {
00178             return 0;
00179         }
00180         return ((double(d_num_bytes) * double(d_num_iterations) / double(1e6)) /
00181                 double(std::chrono::duration_cast<std::chrono::seconds>(d_duration).count()));
00182     }
00183 
00184     std::string to_string() const {
00185         std::stringstream tmp;
00186         tmp << std::setw(12) << std::right << d_num_iterations;
00187         size_t npo = get_ns_per_op();
00188         tmp << std::setw(12) << std::right << npo << std::setw(0) << " ns/op";
00189         double mbs = get_mb_per_s();
00190         if (mbs > 0.0) {
00191             tmp << std::setw(12) << std::right << mbs << std::setw(0) << " MB/s";
00192         }
00193         return std::string(tmp.str());
00194     }
00195 };
00196 
00197 /*
00198  * The parallel_context class is responsible for providing a thread-safe context for parallel benchmark code.
00199  */
00200 class parallel_context {
00201     std::atomic_intmax_t d_num_iterations;
00202 public:
00203     parallel_context(size_t num_iterations)
00204         : d_num_iterations(num_iterations)
00205     {}
00206 
00207     bool next() {
00208         return (d_num_iterations.fetch_sub(1) > 0);
00209     }
00210 };
00211 
00212 /*
00213  * The context class is responsible for providing an interface for capturing benchmark metrics to benchmark functions.
00214  */
00215 class context {
00216     bool                                           d_timer_on;
00217     std::chrono::high_resolution_clock::time_point d_start;
00218     std::chrono::nanoseconds                       d_duration;
00219     std::chrono::seconds                           d_benchtime;
00220     size_t                                         d_num_iterations;
00221     size_t                                         d_num_threads;
00222     size_t                                         d_num_bytes;
00223     benchmark_info                                 d_benchmark;
00224 
00225 public:
00226     context(const benchmark_info& info, const options& opts)
00227         : d_timer_on(false)
00228         , d_start()
00229         , d_duration()
00230         , d_benchtime(std::chrono::seconds(opts.get_benchtime()))
00231         , d_num_iterations(1)
00232         , d_num_threads(opts.get_cpu())
00233         , d_num_bytes(0)
00234         , d_benchmark(info)
00235     {}
00236 
00237     size_t num_iterations() const { return d_num_iterations; }
00238 
00239     void set_num_threads(size_t n) { d_num_threads = n; }
00240     size_t num_threads() const { return d_num_threads; }
00241 
00242     void start_timer() {
00243         if (!d_timer_on) {
00244             d_start = std::chrono::high_resolution_clock::now();
00245             d_timer_on = true;
00246         }
00247     }
00248     void stop_timer() {
00249         if (d_timer_on) {
00250             d_duration += std::chrono::high_resolution_clock::now() - d_start;
00251             d_timer_on = false;
00252         }
00253     }
00254     void reset_timer() {
00255         if (d_timer_on) {
00256             d_start = std::chrono::high_resolution_clock::now();
00257         }
00258         d_duration = std::chrono::nanoseconds::zero();
00259     }
00260 
00261     void set_bytes(int64_t bytes) { d_num_bytes = bytes; }
00262 
00263     size_t get_ns_per_op() {
00264         if (d_num_iterations <= 0) {
00265             return 0;
00266         }
00267         return d_duration.count() / d_num_iterations;
00268     }
00269 
00270     void run_n(size_t n) {
00271         d_num_iterations = n;
00272         reset_timer();
00273         start_timer();
00274         d_benchmark.get_func()(this);
00275         stop_timer();
00276     }
00277 
00278     void run_parallel(std::function<void(parallel_context*)> f) {
00279         parallel_context pc(d_num_iterations);
00280         std::vector<std::thread> threads;
00281         for (size_t i = 0; i < d_num_threads; ++i) {
00282             threads.push_back(std::thread([&pc,&f]() -> void {
00283                 f(&pc);
00284             }));
00285         }
00286         for(auto& thread : threads){
00287             thread.join();
00288         }
00289     }
00290 
00291     result run() {
00292         size_t n = 1;
00293         run_n(n);
00294         while (d_duration < d_benchtime && n < 1e9) {
00295             size_t last = n;
00296             if (get_ns_per_op() == 0) {
00297                 n = 1e9;
00298             } else {
00299                 n = d_duration.count() / get_ns_per_op();
00300             }
00301             n = std::max(std::min(n+n/2, 100*last), last+1);
00302             n = round_up(n);
00303             run_n(n);
00304         }
00305         return result(n, d_duration, d_num_bytes);
00306     }
00307 
00308 private:
00309     template<typename T>
00310     T round_down_10(T n) {
00311         int tens = 0;
00312         while (n > 10) {
00313             n /= 10;
00314             tens++;
00315         }
00316         int result = 1;
00317         for (int i = 0; i < tens; ++i) {
00318             result *= 10;
00319         }
00320         return result;
00321     }
00322 
00323     template<typename T>
00324     T round_up(T n) {
00325         T base = round_down_10(n);
00326         if (n < (2 * base)) {
00327             return 2 * base;
00328         }
00329         if (n < (5 * base)) {
00330             return 5 * base;
00331         }
00332         return 10 * base;
00333     }
00334 };
00335 
00336 /*
00337  * The run_benchmarks function will run the registered benchmarks.
00338  */
00339 void run_benchmarks(const options& opts) {
00340     std::regex match_r(opts.get_bench());
00341     auto benchmarks = registration::get_ptr()->get_benchmarks();
00342     for (auto& info : benchmarks) {
00343         if (std::regex_match(info.get_name(), match_r)) {
00344             context c(info, opts);
00345             auto r = c.run();
00346             std::cout << std::setw(35) << std::left << info.get_name() << r.to_string() << std::endl;
00347         }
00348     }
00349 }
00350 
00351 } // namespace benchpress
00352 
00353 /*
00354  * If BENCHPRESS_CONFIG_MAIN is defined when the file is included then a main function will be emitted which provides a
00355  * command-line parser and then executes run_benchmarks.
00356  */
00357 #ifdef BENCHPRESS_CONFIG_MAIN
00358 #include "cxxopts.hpp"
00359 benchpress::registration* benchpress::registration::d_this;
00360 int main(int argc, char** argv) {
00361     std::chrono::high_resolution_clock::time_point bp_start = std::chrono::high_resolution_clock::now();
00362     benchpress::options bench_opts;
00363     try {
00364         cxxopts::Options cmd_opts(argv[0], " - command line options");
00365         cmd_opts.add_options()
00366             ("bench", "run benchmarks matching the regular expression", cxxopts::value<std::string>()
00367                 ->default_value(".*"))
00368             ("benchtime", "run enough iterations of each benchmark to take t seconds", cxxopts::value<size_t>()
00369                 ->default_value("1"))
00370             ("cpu", "specify the number of threads to use for parallel benchmarks", cxxopts::value<size_t>()
00371                 ->default_value(std::to_string(std::thread::hardware_concurrency())))
00372             ("help", "print help")
00373         ;
00374         cmd_opts.parse(argc, argv);
00375         if (cmd_opts.count("help")) {
00376             std::cout << cmd_opts.help({""}) << std::endl;
00377             exit(0);
00378         }
00379         if (cmd_opts.count("bench")) {
00380             bench_opts.bench(cmd_opts["bench"].as<std::string>());
00381         }
00382         if (cmd_opts.count("benchtime")) {
00383             bench_opts.benchtime(cmd_opts["benchtime"].as<size_t>());
00384         }
00385         if (cmd_opts.count("cpu")) {
00386             bench_opts.cpu(cmd_opts["cpu"].as<size_t>());
00387         }
00388     } catch (const cxxopts::OptionException& e) {
00389         std::cout << "error parsing options: " << e.what() << std::endl;
00390         exit(1);
00391     }
00392     benchpress::run_benchmarks(bench_opts);
00393     float duration = std::chrono::duration_cast<std::chrono::milliseconds>(
00394             std::chrono::high_resolution_clock::now() - bp_start
00395     ).count() / 1000.f;
00396     std::cout << argv[0] << " " << duration << "s" << std::endl;
00397     return 0;
00398 }
00399 #endif
00400 
00401 #endif // BENCHPRESS_HPP


rc_visard_driver
Author(s): Heiko Hirschmueller , Christian Emmerich , Felix Ruess
autogenerated on Thu Jun 6 2019 20:43:01