benchmark-blocking-sizes.cpp
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2015 Benoit Jacob <benoitjacob@google.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #include <iostream>
11 #include <cstdint>
12 #include <cstdlib>
13 #include <vector>
14 #include <fstream>
15 #include <memory>
16 #include <cstdio>
17 
20 #define EIGEN_TEST_SPECIFIC_BLOCKING_SIZES eigen_use_specific_block_size
21 #define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K eigen_block_size_k
22 #define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M eigen_block_size_m
23 #define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N eigen_block_size_n
24 #include <Eigen/Core>
25 
26 #include <bench/BenchTimer.h>
27 
28 using namespace Eigen;
29 using namespace std;
30 
32 
33 // how many times we repeat each measurement.
34 // measurements are randomly shuffled - we're not doing
35 // all N identical measurements in a row.
37 
38 // Timings below this value are too short to be accurate,
39 // we'll repeat measurements with more iterations until
40 // we get a timing above that threshold.
41 const float min_accurate_time = 1e-2f;
42 
43 // See --min-working-set-size command line parameter.
45 
46 float max_clock_speed = 0.0f;
47 
48 // range of sizes that we will benchmark (in all 3 K,M,N dimensions)
49 const size_t maxsize = 2048;
50 const size_t minsize = 16;
51 
52 typedef MatrixXf MatrixType;
55 
56 static_assert((maxsize & (maxsize - 1)) == 0, "maxsize must be a power of two");
57 static_assert((minsize & (minsize - 1)) == 0, "minsize must be a power of two");
58 static_assert(maxsize > minsize, "maxsize must be larger than minsize");
59 static_assert(maxsize < (minsize << 16), "maxsize must be less than (minsize<<16)");
60 
61 // just a helper to store a triple of K,M,N sizes for matrix product
62 struct size_triple_t
63 {
64  size_t k, m, n;
65  size_triple_t() : k(0), m(0), n(0) {}
66  size_triple_t(size_t _k, size_t _m, size_t _n) : k(_k), m(_m), n(_n) {}
67  size_triple_t(const size_triple_t& o) : k(o.k), m(o.m), n(o.n) {}
69  {
70  k = 1 << ((compact & 0xf00) >> 8);
71  m = 1 << ((compact & 0x0f0) >> 4);
72  n = 1 << ((compact & 0x00f) >> 0);
73  }
74 };
75 
76 uint8_t log2_pot(size_t x) {
77  size_t l = 0;
78  while (x >>= 1) l++;
79  return l;
80 }
81 
82 // Convert between size tripes and a compact form fitting in 12 bits
83 // where each size, which must be a POT, is encoded as its log2, on 4 bits
84 // so the largest representable size is 2^15 == 32k ... big enough.
85 uint16_t compact_size_triple(size_t k, size_t m, size_t n)
86 {
87  return (log2_pot(k) << 8) | (log2_pot(m) << 4) | log2_pot(n);
88 }
89 
91 {
92  return compact_size_triple(t.k, t.m, t.n);
93 }
94 
95 // A single benchmark. Initially only contains benchmark params.
96 // Then call run(), which stores the result in the gflops field.
98 {
102  float gflops;
104  : compact_product_size(0)
105  , compact_block_size(0)
106  , use_default_block_size(false)
107  , gflops(0)
108  {
109  }
110  benchmark_t(size_t pk, size_t pm, size_t pn,
111  size_t bk, size_t bm, size_t bn)
112  : compact_product_size(compact_size_triple(pk, pm, pn))
113  , compact_block_size(compact_size_triple(bk, bm, bn))
114  , use_default_block_size(false)
115  , gflops(0)
116  {}
117  benchmark_t(size_t pk, size_t pm, size_t pn)
118  : compact_product_size(compact_size_triple(pk, pm, pn))
119  , compact_block_size(0)
120  , use_default_block_size(true)
121  , gflops(0)
122  {}
123 
124  void run();
125 };
126 
127 ostream& operator<<(ostream& s, const benchmark_t& b)
128 {
129  s << hex << b.compact_product_size << dec;
130  if (b.use_default_block_size) {
132  Index k = t.k, m = t.m, n = t.n;
133  internal::computeProductBlockingSizes<Scalar, Scalar>(k, m, n);
134  s << " default(" << k << ", " << m << ", " << n << ")";
135  } else {
136  s << " " << hex << b.compact_block_size << dec;
137  }
138  s << " " << b.gflops;
139  return s;
140 }
141 
142 // We sort first by increasing benchmark parameters,
143 // then by decreasing performance.
144 bool operator<(const benchmark_t& b1, const benchmark_t& b2)
145 {
150  b1.gflops > b2.gflops))));
151 }
152 
154 {
155  size_triple_t productsizes(compact_product_size);
156 
157  if (use_default_block_size) {
159  } else {
160  // feed eigen with our custom blocking params
162  size_triple_t blocksizes(compact_block_size);
163  eigen_block_size_k = blocksizes.k;
164  eigen_block_size_m = blocksizes.m;
165  eigen_block_size_n = blocksizes.n;
166  }
167 
168  // set up the matrix pool
169 
170  const size_t combined_three_matrices_sizes =
171  sizeof(Scalar) *
172  (productsizes.k * productsizes.m +
173  productsizes.k * productsizes.n +
174  productsizes.m * productsizes.n);
175 
176  // 64 M is large enough that nobody has a cache bigger than that,
177  // while still being small enough that everybody has this much RAM,
178  // so conveniently we don't need to special-case platforms here.
179  const size_t unlikely_large_cache_size = 64 << 20;
180 
181  const size_t working_set_size =
182  min_working_set_size ? min_working_set_size : unlikely_large_cache_size;
183 
184  const size_t matrix_pool_size =
185  1 + working_set_size / combined_three_matrices_sizes;
186 
187  MatrixType *lhs = new MatrixType[matrix_pool_size];
188  MatrixType *rhs = new MatrixType[matrix_pool_size];
189  MatrixType *dst = new MatrixType[matrix_pool_size];
190 
191  for (size_t i = 0; i < matrix_pool_size; i++) {
192  lhs[i] = MatrixType::Zero(productsizes.m, productsizes.k);
193  rhs[i] = MatrixType::Zero(productsizes.k, productsizes.n);
194  dst[i] = MatrixType::Zero(productsizes.m, productsizes.n);
195  }
196 
197  // main benchmark loop
198 
199  int iters_at_a_time = 1;
200  float time_per_iter = 0.0f;
201  size_t matrix_index = 0;
202  while (true) {
203 
204  double starttime = timer.getCpuTime();
205  for (int i = 0; i < iters_at_a_time; i++) {
206  dst[matrix_index].noalias() = lhs[matrix_index] * rhs[matrix_index];
207  matrix_index++;
208  if (matrix_index == matrix_pool_size) {
209  matrix_index = 0;
210  }
211  }
212  double endtime = timer.getCpuTime();
213 
214  const float timing = float(endtime - starttime);
215 
216  if (timing >= min_accurate_time) {
217  time_per_iter = timing / iters_at_a_time;
218  break;
219  }
220 
221  iters_at_a_time *= 2;
222  }
223 
224  delete[] lhs;
225  delete[] rhs;
226  delete[] dst;
227 
228  gflops = 2e-9 * productsizes.k * productsizes.m * productsizes.n / time_per_iter;
229 }
230 
232 {
233 #ifdef __linux__
234  cout << "contents of /proc/cpuinfo:" << endl;
235  string line;
236  ifstream cpuinfo("/proc/cpuinfo");
237  if (cpuinfo.is_open()) {
238  while (getline(cpuinfo, line)) {
239  cout << line << endl;
240  }
241  cpuinfo.close();
242  }
243  cout << endl;
244 #elif defined __APPLE__
245  cout << "output of sysctl hw:" << endl;
246  system("sysctl hw");
247  cout << endl;
248 #endif
249 }
250 
251 template <typename T>
252 string type_name()
253 {
254  return "unknown";
255 }
256 
257 template<>
259 {
260  return "float";
261 }
262 
263 template<>
265 {
266  return "double";
267 }
268 
269 struct action_t
270 {
271  virtual const char* invokation_name() const { abort(); return nullptr; }
272  virtual void run() const { abort(); }
273  virtual ~action_t() {}
274 };
275 
276 void show_usage_and_exit(int /*argc*/, char* argv[],
277  const vector<unique_ptr<action_t>>& available_actions)
278 {
279  cerr << "usage: " << argv[0] << " <action> [options...]" << endl << endl;
280  cerr << "available actions:" << endl << endl;
281  for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
282  cerr << " " << (*it)->invokation_name() << endl;
283  }
284  cerr << endl;
285  cerr << "options:" << endl << endl;
286  cerr << " --min-working-set-size=N:" << endl;
287  cerr << " Set the minimum working set size to N bytes." << endl;
288  cerr << " This is rounded up as needed to a multiple of matrix size." << endl;
289  cerr << " A larger working set lowers the chance of a warm cache." << endl;
290  cerr << " The default value 0 means use a large enough working" << endl;
291  cerr << " set to likely outsize caches." << endl;
292  cerr << " A value of 1 (that is, 1 byte) would mean don't do anything to" << endl;
293  cerr << " avoid warm caches." << endl;
294  exit(1);
295 }
296 
298 {
299  cerr << "Measuring clock speed... \r" << flush;
300 
301  vector<float> all_gflops;
302  for (int i = 0; i < 8; i++) {
303  benchmark_t b(1024, 1024, 1024);
304  b.run();
305  all_gflops.push_back(b.gflops);
306  }
307 
308  sort(all_gflops.begin(), all_gflops.end());
309  float stable_estimate = all_gflops[2] + all_gflops[3] + all_gflops[4] + all_gflops[5];
310 
311  // multiply by an arbitrary constant to discourage trying doing anything with the
312  // returned values besides just comparing them with each other.
313  float result = stable_estimate * 123.456f;
314 
315  return result;
316 }
317 
319 {
320  int seconds;
321  human_duration_t(int s) : seconds(s) {}
322 };
323 
324 ostream& operator<<(ostream& s, const human_duration_t& d)
325 {
326  int remainder = d.seconds;
327  if (remainder > 3600) {
328  int hours = remainder / 3600;
329  s << hours << " h ";
330  remainder -= hours * 3600;
331  }
332  if (remainder > 60) {
333  int minutes = remainder / 60;
334  s << minutes << " min ";
335  remainder -= minutes * 60;
336  }
337  if (d.seconds < 600) {
338  s << remainder << " s";
339  }
340  return s;
341 }
342 
343 const char session_filename[] = "/data/local/tmp/benchmark-blocking-sizes-session.data";
344 
345 void serialize_benchmarks(const char* filename, const vector<benchmark_t>& benchmarks, size_t first_benchmark_to_run)
346 {
347  FILE* file = fopen(filename, "w");
348  if (!file) {
349  cerr << "Could not open file " << filename << " for writing." << endl;
350  cerr << "Do you have write permissions on the current working directory?" << endl;
351  exit(1);
352  }
353  size_t benchmarks_vector_size = benchmarks.size();
354  fwrite(&max_clock_speed, sizeof(max_clock_speed), 1, file);
355  fwrite(&benchmarks_vector_size, sizeof(benchmarks_vector_size), 1, file);
356  fwrite(&first_benchmark_to_run, sizeof(first_benchmark_to_run), 1, file);
357  fwrite(benchmarks.data(), sizeof(benchmark_t), benchmarks.size(), file);
358  fclose(file);
359 }
360 
361 bool deserialize_benchmarks(const char* filename, vector<benchmark_t>& benchmarks, size_t& first_benchmark_to_run)
362 {
363  FILE* file = fopen(filename, "r");
364  if (!file) {
365  return false;
366  }
367  if (1 != fread(&max_clock_speed, sizeof(max_clock_speed), 1, file)) {
368  return false;
369  }
370  size_t benchmarks_vector_size = 0;
371  if (1 != fread(&benchmarks_vector_size, sizeof(benchmarks_vector_size), 1, file)) {
372  return false;
373  }
374  if (1 != fread(&first_benchmark_to_run, sizeof(first_benchmark_to_run), 1, file)) {
375  return false;
376  }
377  benchmarks.resize(benchmarks_vector_size);
378  if (benchmarks.size() != fread(benchmarks.data(), sizeof(benchmark_t), benchmarks.size(), file)) {
379  return false;
380  }
381  unlink(filename);
382  return true;
383 }
384 
386  vector<benchmark_t>& benchmarks,
387  double time_start,
388  size_t& first_benchmark_to_run)
389 {
390  if (first_benchmark_to_run == benchmarks.size()) {
391  return;
392  }
393 
394  double time_last_progress_update = 0;
395  double time_last_clock_speed_measurement = 0;
396  double time_now = 0;
397 
398  size_t benchmark_index = first_benchmark_to_run;
399 
400  while (true) {
401  float ratio_done = float(benchmark_index) / benchmarks.size();
402  time_now = timer.getRealTime();
403 
404  // We check clock speed every minute and at the end.
405  if (benchmark_index == benchmarks.size() ||
406  time_now > time_last_clock_speed_measurement + 60.0f)
407  {
408  time_last_clock_speed_measurement = time_now;
409 
410  // Ensure that clock speed is as expected
411  float current_clock_speed = measure_clock_speed();
412 
413  // The tolerance needs to be smaller than the relative difference between
414  // clock speeds that a device could operate under.
415  // It seems unlikely that a device would be throttling clock speeds by
416  // amounts smaller than 2%.
417  // With a value of 1%, I was getting within noise on a Sandy Bridge.
418  const float clock_speed_tolerance = 0.02f;
419 
420  if (current_clock_speed > (1 + clock_speed_tolerance) * max_clock_speed) {
421  // Clock speed is now higher than we previously measured.
422  // Either our initial measurement was inaccurate, which won't happen
423  // too many times as we are keeping the best clock speed value and
424  // and allowing some tolerance; or something really weird happened,
425  // which invalidates all benchmark results collected so far.
426  // Either way, we better restart all over again now.
427  if (benchmark_index) {
428  cerr << "Restarting at " << 100.0f * ratio_done
429  << " % because clock speed increased. " << endl;
430  }
431  max_clock_speed = current_clock_speed;
432  first_benchmark_to_run = 0;
433  return;
434  }
435 
436  bool rerun_last_tests = false;
437 
438  if (current_clock_speed < (1 - clock_speed_tolerance) * max_clock_speed) {
439  cerr << "Measurements completed so far: "
440  << 100.0f * ratio_done
441  << " % " << endl;
442  cerr << "Clock speed seems to be only "
443  << current_clock_speed/max_clock_speed
444  << " times what it used to be." << endl;
445 
446  unsigned int seconds_to_sleep_if_lower_clock_speed = 1;
447 
448  while (current_clock_speed < (1 - clock_speed_tolerance) * max_clock_speed) {
449  if (seconds_to_sleep_if_lower_clock_speed > 32) {
450  cerr << "Sleeping longer probably won't make a difference." << endl;
451  cerr << "Serializing benchmarks to " << session_filename << endl;
452  serialize_benchmarks(session_filename, benchmarks, first_benchmark_to_run);
453  cerr << "Now restart this benchmark, and it should pick up where we left." << endl;
454  exit(2);
455  }
456  rerun_last_tests = true;
457  cerr << "Sleeping "
458  << seconds_to_sleep_if_lower_clock_speed
459  << " s... \r" << endl;
460  sleep(seconds_to_sleep_if_lower_clock_speed);
461  current_clock_speed = measure_clock_speed();
462  seconds_to_sleep_if_lower_clock_speed *= 2;
463  }
464  }
465 
466  if (rerun_last_tests) {
467  cerr << "Redoing the last "
468  << 100.0f * float(benchmark_index - first_benchmark_to_run) / benchmarks.size()
469  << " % because clock speed had been low. " << endl;
470  return;
471  }
472 
473  // nothing wrong with the clock speed so far, so there won't be a need to rerun
474  // benchmarks run so far in case we later encounter a lower clock speed.
475  first_benchmark_to_run = benchmark_index;
476  }
477 
478  if (benchmark_index == benchmarks.size()) {
479  // We're done!
480  first_benchmark_to_run = benchmarks.size();
481  // Erase progress info
482  cerr << " " << endl;
483  return;
484  }
485 
486  // Display progress info on stderr
487  if (time_now > time_last_progress_update + 1.0f) {
488  time_last_progress_update = time_now;
489  cerr << "Measurements... " << 100.0f * ratio_done
490  << " %, ETA "
491  << human_duration_t(float(time_now - time_start) * (1.0f - ratio_done) / ratio_done)
492  << " \r" << flush;
493  }
494 
495  // This is where we actually run a benchmark!
496  benchmarks[benchmark_index].run();
497  benchmark_index++;
498  }
499 }
500 
501 void run_benchmarks(vector<benchmark_t>& benchmarks)
502 {
503  size_t first_benchmark_to_run;
504  vector<benchmark_t> deserialized_benchmarks;
505  bool use_deserialized_benchmarks = false;
506  if (deserialize_benchmarks(session_filename, deserialized_benchmarks, first_benchmark_to_run)) {
507  cerr << "Found serialized session with "
508  << 100.0f * first_benchmark_to_run / deserialized_benchmarks.size()
509  << " % already done" << endl;
510  if (deserialized_benchmarks.size() == benchmarks.size() &&
511  first_benchmark_to_run > 0 &&
512  first_benchmark_to_run < benchmarks.size())
513  {
514  use_deserialized_benchmarks = true;
515  }
516  }
517 
518  if (use_deserialized_benchmarks) {
519  benchmarks = deserialized_benchmarks;
520  } else {
521  // not using deserialized benchmarks, starting from scratch
522  first_benchmark_to_run = 0;
523 
524  // Randomly shuffling benchmarks allows us to get accurate enough progress info,
525  // as now the cheap/expensive benchmarks are randomly mixed so they average out.
526  // It also means that if data is corrupted for some time span, the odds are that
527  // not all repetitions of a given benchmark will be corrupted.
528  random_shuffle(benchmarks.begin(), benchmarks.end());
529  }
530 
531  for (int i = 0; i < 4; i++) {
533  }
534 
535  double time_start = 0.0;
536  while (first_benchmark_to_run < benchmarks.size()) {
537  if (first_benchmark_to_run == 0) {
538  time_start = timer.getRealTime();
539  }
540  try_run_some_benchmarks(benchmarks,
541  time_start,
542  first_benchmark_to_run);
543  }
544 
545  // Sort timings by increasing benchmark parameters, and decreasing gflops.
546  // The latter is very important. It means that we can ignore all but the first
547  // benchmark with given parameters.
548  sort(benchmarks.begin(), benchmarks.end());
549 
550  // Collect best (i.e. now first) results for each parameter values.
551  vector<benchmark_t> best_benchmarks;
552  for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) {
553  if (best_benchmarks.empty() ||
554  best_benchmarks.back().compact_product_size != it->compact_product_size ||
555  best_benchmarks.back().compact_block_size != it->compact_block_size)
556  {
557  best_benchmarks.push_back(*it);
558  }
559  }
560 
561  // keep and return only the best benchmarks
562  benchmarks = best_benchmarks;
563 }
564 
566 {
567  virtual const char* invokation_name() const { return "all-pot-sizes"; }
568  virtual void run() const
569  {
570  vector<benchmark_t> benchmarks;
571  for (int repetition = 0; repetition < measurement_repetitions; repetition++) {
572  for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) {
573  for (size_t msize = minsize; msize <= maxsize; msize *= 2) {
574  for (size_t nsize = minsize; nsize <= maxsize; nsize *= 2) {
575  for (size_t kblock = minsize; kblock <= ksize; kblock *= 2) {
576  for (size_t mblock = minsize; mblock <= msize; mblock *= 2) {
577  for (size_t nblock = minsize; nblock <= nsize; nblock *= 2) {
578  benchmarks.emplace_back(ksize, msize, nsize, kblock, mblock, nblock);
579  }
580  }
581  }
582  }
583  }
584  }
585  }
586 
587  run_benchmarks(benchmarks);
588 
589  cout << "BEGIN MEASUREMENTS ALL POT SIZES" << endl;
590  for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) {
591  cout << *it << endl;
592  }
593  }
594 };
595 
597 {
598  virtual const char* invokation_name() const { return "default-sizes"; }
599  virtual void run() const
600  {
601  vector<benchmark_t> benchmarks;
602  for (int repetition = 0; repetition < measurement_repetitions; repetition++) {
603  for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) {
604  for (size_t msize = minsize; msize <= maxsize; msize *= 2) {
605  for (size_t nsize = minsize; nsize <= maxsize; nsize *= 2) {
606  benchmarks.emplace_back(ksize, msize, nsize);
607  }
608  }
609  }
610  }
611 
612  run_benchmarks(benchmarks);
613 
614  cout << "BEGIN MEASUREMENTS DEFAULT SIZES" << endl;
615  for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) {
616  cout << *it << endl;
617  }
618  }
619 };
620 
621 int main(int argc, char* argv[])
622 {
623  double time_start = timer.getRealTime();
624  cout.precision(4);
625  cerr.precision(4);
626 
627  vector<unique_ptr<action_t>> available_actions;
628  available_actions.emplace_back(new measure_all_pot_sizes_action_t);
629  available_actions.emplace_back(new measure_default_sizes_action_t);
630 
631  auto action = available_actions.end();
632 
633  if (argc <= 1) {
634  show_usage_and_exit(argc, argv, available_actions);
635  }
636  for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
637  if (!strcmp(argv[1], (*it)->invokation_name())) {
638  action = it;
639  break;
640  }
641  }
642 
643  if (action == available_actions.end()) {
644  show_usage_and_exit(argc, argv, available_actions);
645  }
646 
647  for (int i = 2; i < argc; i++) {
648  if (argv[i] == strstr(argv[i], "--min-working-set-size=")) {
649  const char* equals_sign = strchr(argv[i], '=');
650  min_working_set_size = strtoul(equals_sign+1, nullptr, 10);
651  } else {
652  cerr << "unrecognized option: " << argv[i] << endl << endl;
653  show_usage_and_exit(argc, argv, available_actions);
654  }
655  }
656 
657  print_cpuinfo();
658 
659  cout << "benchmark parameters:" << endl;
660  cout << "pointer size: " << 8*sizeof(void*) << " bits" << endl;
661  cout << "scalar type: " << type_name<Scalar>() << endl;
662  cout << "packet size: " << internal::packet_traits<MatrixType::Scalar>::size << endl;
663  cout << "minsize = " << minsize << endl;
664  cout << "maxsize = " << maxsize << endl;
665  cout << "measurement_repetitions = " << measurement_repetitions << endl;
666  cout << "min_accurate_time = " << min_accurate_time << endl;
667  cout << "min_working_set_size = " << min_working_set_size;
668  if (min_working_set_size == 0) {
669  cout << " (try to outsize caches)";
670  }
671  cout << endl << endl;
672 
673  (*action)->run();
674 
675  double time_end = timer.getRealTime();
676  cerr << "Finished in " << human_duration_t(time_end - time_start) << endl;
677 }
void run_benchmarks(vector< benchmark_t > &benchmarks)
Matrix3f m
internal::packet_traits< Scalar >::type Packet
unsigned char uint8_t
Definition: ms_stdint.h:83
const int measurement_repetitions
const char session_filename[]
string type_name()
size_t min_working_set_size
Scalar * b
Definition: benchVecAdd.cpp:17
void show_usage_and_exit(int, char *argv[], const vector< unique_ptr< action_t >> &available_actions)
double getCpuTime() const
Definition: BenchTimer.h:130
bool operator<(const benchmark_t &b1, const benchmark_t &b2)
size_triple_t(uint16_t compact)
size_triple_t(const size_triple_t &o)
int n
bool deserialize_benchmarks(const char *filename, vector< benchmark_t > &benchmarks, size_t &first_benchmark_to_run)
Vector2 b2(4, -5)
Namespace containing all symbols from the Eigen library.
Definition: jet.h:637
CleanedUpDerType< DerType >::type() max(const AutoDiffScalar< DerType > &x, const T &y)
int eigen_block_size_m
Definition: BFloat16.h:88
MatrixXf MatrixType
unsigned short uint16_t
Definition: ms_stdint.h:84
uint8_t log2_pot(size_t x)
virtual void run() const
void print_cpuinfo()
const size_t minsize
benchmark_t(size_t pk, size_t pm, size_t pn)
const size_t maxsize
virtual const char * invokation_name() const
MatrixType::Scalar Scalar
virtual const char * invokation_name() const
double getRealTime() const
Definition: BenchTimer.h:145
void try_run_some_benchmarks(vector< benchmark_t > &benchmarks, double time_start, size_t &first_benchmark_to_run)
static const Line3 l(Rot3(), 1, 1)
Values result
bool eigen_use_specific_block_size
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:74
Point2(* f)(const Point3 &, OptionalJacobian< 2, 3 >)
size_triple_t(size_t _k, size_t _m, size_t _n)
Array< double, 1, 3 > e(1./3., 0.5, 2.)
RealScalar s
int eigen_block_size_k
float measure_clock_speed()
benchmark_t(size_t pk, size_t pm, size_t pn, size_t bk, size_t bm, size_t bn)
const float min_accurate_time
float max_clock_speed
uint16_t compact_size_triple(size_t k, size_t m, size_t n)
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy x
string type_name< double >()
static BenchTimer timer
string type_name< float >()
std::ostream & operator<<(std::ostream &os, const DSizes< IndexType, NumDims > &dims)
int main(int argc, char *argv[])
int eigen_block_size_n
Point2 t(10, 10)
virtual const char * invokation_name() const
void serialize_benchmarks(const char *filename, const vector< benchmark_t > &benchmarks, size_t first_benchmark_to_run)
Vector2 b1(2, -1)


gtsam
Author(s):
autogenerated on Tue Jul 4 2023 02:33:57