benchmark-blocking-sizes.cpp
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2015 Benoit Jacob <benoitjacob@google.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #include <iostream>
11 #include <cstdint>
12 #include <cstdlib>
13 #include <vector>
14 #include <fstream>
15 #include <memory>
16 #include <cstdio>
17 
20 #define EIGEN_TEST_SPECIFIC_BLOCKING_SIZES eigen_use_specific_block_size
21 #define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K eigen_block_size_k
22 #define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M eigen_block_size_m
23 #define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N eigen_block_size_n
24 #include <Eigen/Core>
25 
26 #include <bench/BenchTimer.h>
27 
28 using namespace Eigen;
29 using namespace std;
30 
32 
33 // how many times we repeat each measurement.
34 // measurements are randomly shuffled - we're not doing
35 // all N identical measurements in a row.
37 
38 // Timings below this value are too short to be accurate,
39 // we'll repeat measurements with more iterations until
40 // we get a timing above that threshold.
41 const float min_accurate_time = 1e-2f;
42 
43 // See --min-working-set-size command line parameter.
45 
46 float max_clock_speed = 0.0f;
47 
48 // range of sizes that we will benchmark (in all 3 K,M,N dimensions)
49 const size_t maxsize = 2048;
50 const size_t minsize = 16;
51 
52 typedef MatrixXf MatrixType;
55 
56 static_assert((maxsize & (maxsize - 1)) == 0, "maxsize must be a power of two");
57 static_assert((minsize & (minsize - 1)) == 0, "minsize must be a power of two");
58 static_assert(maxsize > minsize, "maxsize must be larger than minsize");
59 static_assert(maxsize < (minsize << 16), "maxsize must be less than (minsize<<16)");
60 
61 // just a helper to store a triple of K,M,N sizes for matrix product
62 struct size_triple_t
63 {
64  size_t k, m, n;
65  size_triple_t() : k(0), m(0), n(0) {}
66  size_triple_t(size_t _k, size_t _m, size_t _n) : k(_k), m(_m), n(_n) {}
67  size_triple_t(const size_triple_t& o) : k(o.k), m(o.m), n(o.n) {}
69  {
70  k = 1 << ((compact & 0xf00) >> 8);
71  m = 1 << ((compact & 0x0f0) >> 4);
72  n = 1 << ((compact & 0x00f) >> 0);
73  }
74 };
75 
76 uint8_t log2_pot(size_t x) {
77  size_t l = 0;
78  while (x >>= 1) l++;
79  return l;
80 }
81 
82 // Convert between size tripes and a compact form fitting in 12 bits
83 // where each size, which must be a POT, is encoded as its log2, on 4 bits
84 // so the largest representable size is 2^15 == 32k ... big enough.
85 uint16_t compact_size_triple(size_t k, size_t m, size_t n)
86 {
87  return (log2_pot(k) << 8) | (log2_pot(m) << 4) | log2_pot(n);
88 }
89 
91 {
92  return compact_size_triple(t.k, t.m, t.n);
93 }
94 
95 // A single benchmark. Initially only contains benchmark params.
96 // Then call run(), which stores the result in the gflops field.
98 {
102  float gflops;
104  : compact_product_size(0)
105  , compact_block_size(0)
106  , use_default_block_size(false)
107  , gflops(0)
108  {
109  }
110  benchmark_t(size_t pk, size_t pm, size_t pn,
111  size_t bk, size_t bm, size_t bn)
112  : compact_product_size(compact_size_triple(pk, pm, pn))
113  , compact_block_size(compact_size_triple(bk, bm, bn))
114  , use_default_block_size(false)
115  , gflops(0)
116  {}
117  benchmark_t(size_t pk, size_t pm, size_t pn)
118  : compact_product_size(compact_size_triple(pk, pm, pn))
119  , compact_block_size(0)
120  , use_default_block_size(true)
121  , gflops(0)
122  {}
123 
124  void run();
125 };
126 
127 ostream& operator<<(ostream& s, const benchmark_t& b)
128 {
129  s << hex << b.compact_product_size << dec;
130  if (b.use_default_block_size) {
131  size_triple_t t(b.compact_product_size);
132  Index k = t.k, m = t.m, n = t.n;
133  internal::computeProductBlockingSizes<Scalar, Scalar>(k, m, n);
134  s << " default(" << k << ", " << m << ", " << n << ")";
135  } else {
136  s << " " << hex << b.compact_block_size << dec;
137  }
138  s << " " << b.gflops;
139  return s;
140 }
141 
142 // We sort first by increasing benchmark parameters,
143 // then by decreasing performance.
144 bool operator<(const benchmark_t& b1, const benchmark_t& b2)
145 {
146  return b1.compact_product_size < b2.compact_product_size ||
147  (b1.compact_product_size == b2.compact_product_size && (
148  (b1.compact_block_size < b2.compact_block_size || (
149  b1.compact_block_size == b2.compact_block_size &&
150  b1.gflops > b2.gflops))));
151 }
152 
154 {
155  size_triple_t productsizes(compact_product_size);
156 
157  if (use_default_block_size) {
159  } else {
160  // feed eigen with our custom blocking params
162  size_triple_t blocksizes(compact_block_size);
163  eigen_block_size_k = blocksizes.k;
164  eigen_block_size_m = blocksizes.m;
165  eigen_block_size_n = blocksizes.n;
166  }
167 
168  // set up the matrix pool
169 
170  const size_t combined_three_matrices_sizes =
171  sizeof(Scalar) *
172  (productsizes.k * productsizes.m +
173  productsizes.k * productsizes.n +
174  productsizes.m * productsizes.n);
175 
176  // 64 M is large enough that nobody has a cache bigger than that,
177  // while still being small enough that everybody has this much RAM,
178  // so conveniently we don't need to special-case platforms here.
179  const size_t unlikely_large_cache_size = 64 << 20;
180 
181  const size_t working_set_size =
182  min_working_set_size ? min_working_set_size : unlikely_large_cache_size;
183 
184  const size_t matrix_pool_size =
185  1 + working_set_size / combined_three_matrices_sizes;
186 
187  MatrixType *lhs = new MatrixType[matrix_pool_size];
188  MatrixType *rhs = new MatrixType[matrix_pool_size];
189  MatrixType *dst = new MatrixType[matrix_pool_size];
190 
191  for (size_t i = 0; i < matrix_pool_size; i++) {
192  lhs[i] = MatrixType::Zero(productsizes.m, productsizes.k);
193  rhs[i] = MatrixType::Zero(productsizes.k, productsizes.n);
194  dst[i] = MatrixType::Zero(productsizes.m, productsizes.n);
195  }
196 
197  // main benchmark loop
198 
199  int iters_at_a_time = 1;
200  float time_per_iter = 0.0f;
201  size_t matrix_index = 0;
202  while (true) {
203 
204  double starttime = timer.getCpuTime();
205  for (int i = 0; i < iters_at_a_time; i++) {
206  dst[matrix_index].noalias() = lhs[matrix_index] * rhs[matrix_index];
207  matrix_index++;
208  if (matrix_index == matrix_pool_size) {
209  matrix_index = 0;
210  }
211  }
212  double endtime = timer.getCpuTime();
213 
214  const float timing = float(endtime - starttime);
215 
216  if (timing >= min_accurate_time) {
217  time_per_iter = timing / iters_at_a_time;
218  break;
219  }
220 
221  iters_at_a_time *= 2;
222  }
223 
224  delete[] lhs;
225  delete[] rhs;
226  delete[] dst;
227 
228  gflops = 2e-9 * productsizes.k * productsizes.m * productsizes.n / time_per_iter;
229 }
230 
232 {
233 #ifdef __linux__
234  cout << "contents of /proc/cpuinfo:" << endl;
235  string line;
236  ifstream cpuinfo("/proc/cpuinfo");
237  if (cpuinfo.is_open()) {
238  while (getline(cpuinfo, line)) {
239  cout << line << endl;
240  }
241  cpuinfo.close();
242  }
243  cout << endl;
244 #elif defined __APPLE__
245  cout << "output of sysctl hw:" << endl;
246  system("sysctl hw");
247  cout << endl;
248 #endif
249 }
250 
251 template <typename T>
252 string type_name()
253 {
254  return "unknown";
255 }
256 
257 template<>
259 {
260  return "float";
261 }
262 
263 template<>
265 {
266  return "double";
267 }
268 
269 struct action_t
270 {
271  virtual const char* invokation_name() const { abort(); return nullptr; }
272  virtual void run() const { abort(); }
273  virtual ~action_t() {}
274 };
275 
276 void show_usage_and_exit(int /*argc*/, char* argv[],
277  const vector<unique_ptr<action_t>>& available_actions)
278 {
279  cerr << "usage: " << argv[0] << " <action> [options...]" << endl << endl;
280  cerr << "available actions:" << endl << endl;
281  for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
282  cerr << " " << (*it)->invokation_name() << endl;
283  }
284  cerr << endl;
285  cerr << "options:" << endl << endl;
286  cerr << " --min-working-set-size=N:" << endl;
287  cerr << " Set the minimum working set size to N bytes." << endl;
288  cerr << " This is rounded up as needed to a multiple of matrix size." << endl;
289  cerr << " A larger working set lowers the chance of a warm cache." << endl;
290  cerr << " The default value 0 means use a large enough working" << endl;
291  cerr << " set to likely outsize caches." << endl;
292  cerr << " A value of 1 (that is, 1 byte) would mean don't do anything to" << endl;
293  cerr << " avoid warm caches." << endl;
294  exit(1);
295 }
296 
298 {
299  cerr << "Measuring clock speed... \r" << flush;
300 
301  vector<float> all_gflops;
302  for (int i = 0; i < 8; i++) {
303  benchmark_t b(1024, 1024, 1024);
304  b.run();
305  all_gflops.push_back(b.gflops);
306  }
307 
308  sort(all_gflops.begin(), all_gflops.end());
309  float stable_estimate = all_gflops[2] + all_gflops[3] + all_gflops[4] + all_gflops[5];
310 
311  // multiply by an arbitrary constant to discourage trying doing anything with the
312  // returned values besides just comparing them with each other.
313  float result = stable_estimate * 123.456f;
314 
315  return result;
316 }
317 
319 {
320  int seconds;
321  human_duration_t(int s) : seconds(s) {}
322 };
323 
324 ostream& operator<<(ostream& s, const human_duration_t& d)
325 {
326  int remainder = d.seconds;
327  if (remainder > 3600) {
328  int hours = remainder / 3600;
329  s << hours << " h ";
330  remainder -= hours * 3600;
331  }
332  if (remainder > 60) {
333  int minutes = remainder / 60;
334  s << minutes << " min ";
335  remainder -= minutes * 60;
336  }
337  if (d.seconds < 600) {
338  s << remainder << " s";
339  }
340  return s;
341 }
342 
343 const char session_filename[] = "/data/local/tmp/benchmark-blocking-sizes-session.data";
344 
345 void serialize_benchmarks(const char* filename, const vector<benchmark_t>& benchmarks, size_t first_benchmark_to_run)
346 {
347  FILE* file = fopen(filename, "w");
348  if (!file) {
349  cerr << "Could not open file " << filename << " for writing." << endl;
350  cerr << "Do you have write permissions on the current working directory?" << endl;
351  exit(1);
352  }
353  size_t benchmarks_vector_size = benchmarks.size();
354  fwrite(&max_clock_speed, sizeof(max_clock_speed), 1, file);
355  fwrite(&benchmarks_vector_size, sizeof(benchmarks_vector_size), 1, file);
356  fwrite(&first_benchmark_to_run, sizeof(first_benchmark_to_run), 1, file);
357  fwrite(benchmarks.data(), sizeof(benchmark_t), benchmarks.size(), file);
358  fclose(file);
359 }
360 
361 bool deserialize_benchmarks(const char* filename, vector<benchmark_t>& benchmarks, size_t& first_benchmark_to_run)
362 {
363  FILE* file = fopen(filename, "r");
364  if (!file) {
365  return false;
366  }
367  if (1 != fread(&max_clock_speed, sizeof(max_clock_speed), 1, file)) {
368  return false;
369  }
370  size_t benchmarks_vector_size = 0;
371  if (1 != fread(&benchmarks_vector_size, sizeof(benchmarks_vector_size), 1, file)) {
372  return false;
373  }
374  if (1 != fread(&first_benchmark_to_run, sizeof(first_benchmark_to_run), 1, file)) {
375  return false;
376  }
377  benchmarks.resize(benchmarks_vector_size);
378  if (benchmarks.size() != fread(benchmarks.data(), sizeof(benchmark_t), benchmarks.size(), file)) {
379  return false;
380  }
381  unlink(filename);
382  return true;
383 }
384 
386  vector<benchmark_t>& benchmarks,
387  double time_start,
388  size_t& first_benchmark_to_run)
389 {
390  if (first_benchmark_to_run == benchmarks.size()) {
391  return;
392  }
393 
394  double time_last_progress_update = 0;
395  double time_last_clock_speed_measurement = 0;
396  double time_now = 0;
397 
398  size_t benchmark_index = first_benchmark_to_run;
399 
400  while (true) {
401  float ratio_done = float(benchmark_index) / benchmarks.size();
402  time_now = timer.getRealTime();
403 
404  // We check clock speed every minute and at the end.
405  if (benchmark_index == benchmarks.size() ||
406  time_now > time_last_clock_speed_measurement + 60.0f)
407  {
408  time_last_clock_speed_measurement = time_now;
409 
410  // Ensure that clock speed is as expected
411  float current_clock_speed = measure_clock_speed();
412 
413  // The tolerance needs to be smaller than the relative difference between
414  // clock speeds that a device could operate under.
415  // It seems unlikely that a device would be throttling clock speeds by
416  // amounts smaller than 2%.
417  // With a value of 1%, I was getting within noise on a Sandy Bridge.
418  const float clock_speed_tolerance = 0.02f;
419 
420  if (current_clock_speed > (1 + clock_speed_tolerance) * max_clock_speed) {
421  // Clock speed is now higher than we previously measured.
422  // Either our initial measurement was inaccurate, which won't happen
423  // too many times as we are keeping the best clock speed value and
424  // and allowing some tolerance; or something really weird happened,
425  // which invalidates all benchmark results collected so far.
426  // Either way, we better restart all over again now.
427  if (benchmark_index) {
428  cerr << "Restarting at " << 100.0f * ratio_done
429  << " % because clock speed increased. " << endl;
430  }
431  max_clock_speed = current_clock_speed;
432  first_benchmark_to_run = 0;
433  return;
434  }
435 
436  bool rerun_last_tests = false;
437 
438  if (current_clock_speed < (1 - clock_speed_tolerance) * max_clock_speed) {
439  cerr << "Measurements completed so far: "
440  << 100.0f * ratio_done
441  << " % " << endl;
442  cerr << "Clock speed seems to be only "
443  << current_clock_speed/max_clock_speed
444  << " times what it used to be." << endl;
445 
446  unsigned int seconds_to_sleep_if_lower_clock_speed = 1;
447 
448  while (current_clock_speed < (1 - clock_speed_tolerance) * max_clock_speed) {
449  if (seconds_to_sleep_if_lower_clock_speed > 32) {
450  cerr << "Sleeping longer probably won't make a difference." << endl;
451  cerr << "Serializing benchmarks to " << session_filename << endl;
452  serialize_benchmarks(session_filename, benchmarks, first_benchmark_to_run);
453  cerr << "Now restart this benchmark, and it should pick up where we left." << endl;
454  exit(2);
455  }
456  rerun_last_tests = true;
457  cerr << "Sleeping "
458  << seconds_to_sleep_if_lower_clock_speed
459  << " s... \r" << endl;
460  sleep(seconds_to_sleep_if_lower_clock_speed);
461  current_clock_speed = measure_clock_speed();
462  seconds_to_sleep_if_lower_clock_speed *= 2;
463  }
464  }
465 
466  if (rerun_last_tests) {
467  cerr << "Redoing the last "
468  << 100.0f * float(benchmark_index - first_benchmark_to_run) / benchmarks.size()
469  << " % because clock speed had been low. " << endl;
470  return;
471  }
472 
473  // nothing wrong with the clock speed so far, so there won't be a need to rerun
474  // benchmarks run so far in case we later encounter a lower clock speed.
475  first_benchmark_to_run = benchmark_index;
476  }
477 
478  if (benchmark_index == benchmarks.size()) {
479  // We're done!
480  first_benchmark_to_run = benchmarks.size();
481  // Erase progress info
482  cerr << " " << endl;
483  return;
484  }
485 
486  // Display progress info on stderr
487  if (time_now > time_last_progress_update + 1.0f) {
488  time_last_progress_update = time_now;
489  cerr << "Measurements... " << 100.0f * ratio_done
490  << " %, ETA "
491  << human_duration_t(float(time_now - time_start) * (1.0f - ratio_done) / ratio_done)
492  << " \r" << flush;
493  }
494 
495  // This is where we actually run a benchmark!
496  benchmarks[benchmark_index].run();
497  benchmark_index++;
498  }
499 }
500 
501 void run_benchmarks(vector<benchmark_t>& benchmarks)
502 {
503  size_t first_benchmark_to_run;
504  vector<benchmark_t> deserialized_benchmarks;
505  bool use_deserialized_benchmarks = false;
506  if (deserialize_benchmarks(session_filename, deserialized_benchmarks, first_benchmark_to_run)) {
507  cerr << "Found serialized session with "
508  << 100.0f * first_benchmark_to_run / deserialized_benchmarks.size()
509  << " % already done" << endl;
510  if (deserialized_benchmarks.size() == benchmarks.size() &&
511  first_benchmark_to_run > 0 &&
512  first_benchmark_to_run < benchmarks.size())
513  {
514  use_deserialized_benchmarks = true;
515  }
516  }
517 
518  if (use_deserialized_benchmarks) {
519  benchmarks = deserialized_benchmarks;
520  } else {
521  // not using deserialized benchmarks, starting from scratch
522  first_benchmark_to_run = 0;
523 
524  // Randomly shuffling benchmarks allows us to get accurate enough progress info,
525  // as now the cheap/expensive benchmarks are randomly mixed so they average out.
526  // It also means that if data is corrupted for some time span, the odds are that
527  // not all repetitions of a given benchmark will be corrupted.
528  random_shuffle(benchmarks.begin(), benchmarks.end());
529  }
530 
531  for (int i = 0; i < 4; i++) {
533  }
534 
535  double time_start = 0.0;
536  while (first_benchmark_to_run < benchmarks.size()) {
537  if (first_benchmark_to_run == 0) {
538  time_start = timer.getRealTime();
539  }
540  try_run_some_benchmarks(benchmarks,
541  time_start,
542  first_benchmark_to_run);
543  }
544 
545  // Sort timings by increasing benchmark parameters, and decreasing gflops.
546  // The latter is very important. It means that we can ignore all but the first
547  // benchmark with given parameters.
548  sort(benchmarks.begin(), benchmarks.end());
549 
550  // Collect best (i.e. now first) results for each parameter values.
551  vector<benchmark_t> best_benchmarks;
552  for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) {
553  if (best_benchmarks.empty() ||
554  best_benchmarks.back().compact_product_size != it->compact_product_size ||
555  best_benchmarks.back().compact_block_size != it->compact_block_size)
556  {
557  best_benchmarks.push_back(*it);
558  }
559  }
560 
561  // keep and return only the best benchmarks
562  benchmarks = best_benchmarks;
563 }
564 
566 {
567  virtual const char* invokation_name() const { return "all-pot-sizes"; }
568  virtual void run() const
569  {
570  vector<benchmark_t> benchmarks;
571  for (int repetition = 0; repetition < measurement_repetitions; repetition++) {
572  for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) {
573  for (size_t msize = minsize; msize <= maxsize; msize *= 2) {
574  for (size_t nsize = minsize; nsize <= maxsize; nsize *= 2) {
575  for (size_t kblock = minsize; kblock <= ksize; kblock *= 2) {
576  for (size_t mblock = minsize; mblock <= msize; mblock *= 2) {
577  for (size_t nblock = minsize; nblock <= nsize; nblock *= 2) {
578  benchmarks.emplace_back(ksize, msize, nsize, kblock, mblock, nblock);
579  }
580  }
581  }
582  }
583  }
584  }
585  }
586 
587  run_benchmarks(benchmarks);
588 
589  cout << "BEGIN MEASUREMENTS ALL POT SIZES" << endl;
590  for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) {
591  cout << *it << endl;
592  }
593  }
594 };
595 
597 {
598  virtual const char* invokation_name() const { return "default-sizes"; }
599  virtual void run() const
600  {
601  vector<benchmark_t> benchmarks;
602  for (int repetition = 0; repetition < measurement_repetitions; repetition++) {
603  for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) {
604  for (size_t msize = minsize; msize <= maxsize; msize *= 2) {
605  for (size_t nsize = minsize; nsize <= maxsize; nsize *= 2) {
606  benchmarks.emplace_back(ksize, msize, nsize);
607  }
608  }
609  }
610  }
611 
612  run_benchmarks(benchmarks);
613 
614  cout << "BEGIN MEASUREMENTS DEFAULT SIZES" << endl;
615  for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) {
616  cout << *it << endl;
617  }
618  }
619 };
620 
621 int main(int argc, char* argv[])
622 {
623  double time_start = timer.getRealTime();
624  cout.precision(4);
625  cerr.precision(4);
626 
627  vector<unique_ptr<action_t>> available_actions;
628  available_actions.emplace_back(new measure_all_pot_sizes_action_t);
629  available_actions.emplace_back(new measure_default_sizes_action_t);
630 
631  auto action = available_actions.end();
632 
633  if (argc <= 1) {
634  show_usage_and_exit(argc, argv, available_actions);
635  }
636  for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
637  if (!strcmp(argv[1], (*it)->invokation_name())) {
638  action = it;
639  break;
640  }
641  }
642 
643  if (action == available_actions.end()) {
644  show_usage_and_exit(argc, argv, available_actions);
645  }
646 
647  for (int i = 2; i < argc; i++) {
648  if (argv[i] == strstr(argv[i], "--min-working-set-size=")) {
649  const char* equals_sign = strchr(argv[i], '=');
650  min_working_set_size = strtoul(equals_sign+1, nullptr, 10);
651  } else {
652  cerr << "unrecognized option: " << argv[i] << endl << endl;
653  show_usage_and_exit(argc, argv, available_actions);
654  }
655  }
656 
657  print_cpuinfo();
658 
659  cout << "benchmark parameters:" << endl;
660  cout << "pointer size: " << 8*sizeof(void*) << " bits" << endl;
661  cout << "scalar type: " << type_name<Scalar>() << endl;
662  cout << "packet size: " << internal::packet_traits<MatrixType::Scalar>::size << endl;
663  cout << "minsize = " << minsize << endl;
664  cout << "maxsize = " << maxsize << endl;
665  cout << "measurement_repetitions = " << measurement_repetitions << endl;
666  cout << "min_accurate_time = " << min_accurate_time << endl;
667  cout << "min_working_set_size = " << min_working_set_size;
668  if (min_working_set_size == 0) {
669  cout << " (try to outsize caches)";
670  }
671  cout << endl << endl;
672 
673  (*action)->run();
674 
675  double time_end = timer.getRealTime();
676  cerr << "Finished in " << human_duration_t(time_end - time_start) << endl;
677 }
run_benchmarks
void run_benchmarks(vector< benchmark_t > &benchmarks)
Definition: benchmark-blocking-sizes.cpp:501
compact_size_triple
uint16_t compact_size_triple(size_t k, size_t m, size_t n)
Definition: benchmark-blocking-sizes.cpp:85
maxsize
const size_t maxsize
Definition: benchmark-blocking-sizes.cpp:49
main
int main(int argc, char *argv[])
Definition: benchmark-blocking-sizes.cpp:621
Eigen
Namespace containing all symbols from the Eigen library.
Definition: jet.h:637
type_name< double >
string type_name< double >()
Definition: benchmark-blocking-sizes.cpp:264
max_clock_speed
float max_clock_speed
Definition: benchmark-blocking-sizes.cpp:46
simple_graph::b1
Vector2 b1(2, -1)
benchmark_t::use_default_block_size
bool use_default_block_size
Definition: benchmark-blocking-sizes.cpp:101
Eigen::operator<<
std::ostream & operator<<(std::ostream &os, const DSizes< IndexType, NumDims > &dims)
Definition: TensorDimensions.h:387
Eigen::max
CleanedUpDerType< DerType >::type() max(const AutoDiffScalar< DerType > &x, const T &y)
Definition: AutoDiffScalar.h:585
measure_default_sizes_action_t::invokation_name
virtual const char * invokation_name() const
Definition: benchmark-blocking-sizes.cpp:598
Eigen::internal::packet_traits::size
@ size
Definition: GenericPacketMath.h:112
size_triple_t::size_triple_t
size_triple_t(size_t _k, size_t _m, size_t _n)
Definition: benchmark-blocking-sizes.cpp:66
s
RealScalar s
Definition: level1_cplx_impl.h:126
e
Array< double, 1, 3 > e(1./3., 0.5, 2.)
d
static const double d[K][N]
Definition: igam.h:11
measure_all_pot_sizes_action_t::invokation_name
virtual const char * invokation_name() const
Definition: benchmark-blocking-sizes.cpp:567
size_triple_t::n
size_t n
Definition: benchmark-blocking-sizes.cpp:64
MatrixType
MatrixXf MatrixType
Definition: benchmark-blocking-sizes.cpp:52
minsize
const size_t minsize
Definition: benchmark-blocking-sizes.cpp:50
Packet
internal::packet_traits< Scalar >::type Packet
Definition: benchmark-blocking-sizes.cpp:54
timer
static BenchTimer timer
Definition: benchmark-blocking-sizes.cpp:31
b
Scalar * b
Definition: benchVecAdd.cpp:17
min_accurate_time
const float min_accurate_time
Definition: benchmark-blocking-sizes.cpp:41
simple_graph::b2
Vector2 b2(4, -5)
x
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy x
Definition: gnuplot_common_settings.hh:12
Scalar
MatrixType::Scalar Scalar
Definition: benchmark-blocking-sizes.cpp:53
Eigen::BenchTimer::getCpuTime
double getCpuTime() const
Definition: BenchTimer.h:130
human_duration_t::seconds
int seconds
Definition: benchmark-blocking-sizes.cpp:320
serialize_benchmarks
void serialize_benchmarks(const char *filename, const vector< benchmark_t > &benchmarks, size_t first_benchmark_to_run)
Definition: benchmark-blocking-sizes.cpp:345
benchmark_t
Definition: benchmark-blocking-sizes.cpp:97
show_usage_and_exit
void show_usage_and_exit(int, char *argv[], const vector< unique_ptr< action_t >> &available_actions)
Definition: benchmark-blocking-sizes.cpp:276
measure_default_sizes_action_t
Definition: benchmark-blocking-sizes.cpp:596
benchmark_t::benchmark_t
benchmark_t()
Definition: benchmark-blocking-sizes.cpp:103
deserialize_benchmarks
bool deserialize_benchmarks(const char *filename, vector< benchmark_t > &benchmarks, size_t &first_benchmark_to_run)
Definition: benchmark-blocking-sizes.cpp:361
result
Values result
Definition: OdometryOptimize.cpp:8
measure_default_sizes_action_t::run
virtual void run() const
Definition: benchmark-blocking-sizes.cpp:599
action_t::run
virtual void run() const
Definition: benchmark-blocking-sizes.cpp:272
uint8_t
unsigned char uint8_t
Definition: ms_stdint.h:83
type_name
string type_name()
Definition: benchmark-blocking-sizes.cpp:252
n
int n
Definition: BiCGSTAB_simple.cpp:1
print_cpuinfo
void print_cpuinfo()
Definition: benchmark-blocking-sizes.cpp:231
min_working_set_size
size_t min_working_set_size
Definition: benchmark-blocking-sizes.cpp:44
relicense.filename
filename
Definition: relicense.py:57
eigen_use_specific_block_size
bool eigen_use_specific_block_size
Definition: benchmark-blocking-sizes.cpp:18
l
static const Line3 l(Rot3(), 1, 1)
Eigen::BenchTimer
Definition: BenchTimer.h:59
eigen_block_size_m
int eigen_block_size_m
Definition: benchmark-blocking-sizes.cpp:19
measure_clock_speed
float measure_clock_speed()
Definition: benchmark-blocking-sizes.cpp:297
measure_all_pot_sizes_action_t::run
virtual void run() const
Definition: benchmark-blocking-sizes.cpp:568
gtsam.examples.DogLegOptimizerExample.run
def run(args)
Definition: DogLegOptimizerExample.py:21
session_filename
const char session_filename[]
Definition: benchmark-blocking-sizes.cpp:343
m
Matrix3f m
Definition: AngleAxis_mimic_euler.cpp:1
size_triple_t::size_triple_t
size_triple_t(uint16_t compact)
Definition: benchmark-blocking-sizes.cpp:68
Eigen::Triplet< double >
type_name< float >
string type_name< float >()
Definition: benchmark-blocking-sizes.cpp:258
size_triple_t::n
uint16_t n
Definition: analyze-blocking-sizes.cpp:48
gtsam.examples.DogLegOptimizerExample.action
action
Definition: DogLegOptimizerExample.py:115
human_duration_t
Definition: benchmark-blocking-sizes.cpp:318
benchmark_t::compact_product_size
uint16_t compact_product_size
Definition: benchmark-blocking-sizes.cpp:99
size_triple_t::size_triple_t
size_triple_t(const size_triple_t &o)
Definition: benchmark-blocking-sizes.cpp:67
size_triple_t::m
uint16_t m
Definition: analyze-blocking-sizes.cpp:48
tree::f
Point2(* f)(const Point3 &, OptionalJacobian< 2, 3 >)
Definition: testExpression.cpp:218
operator<
bool operator<(const benchmark_t &b1, const benchmark_t &b2)
Definition: benchmark-blocking-sizes.cpp:144
benchmark_t::compact_block_size
uint16_t compact_block_size
Definition: benchmark-blocking-sizes.cpp:100
benchmark_t::gflops
float gflops
Definition: benchmark-blocking-sizes.cpp:102
Eigen::BenchTimer::getRealTime
double getRealTime() const
Definition: BenchTimer.h:145
pm
Matrix4d pm
Definition: HessenbergDecomposition_packedMatrix.cpp:4
std
Definition: BFloat16.h:88
measurement_repetitions
const int measurement_repetitions
Definition: benchmark-blocking-sizes.cpp:36
human_duration_t::human_duration_t
human_duration_t(int s)
Definition: benchmark-blocking-sizes.cpp:321
action_t
Definition: analyze-blocking-sizes.cpp:589
size_triple_t::size_triple_t
size_triple_t()
Definition: benchmark-blocking-sizes.cpp:65
uint16_t
unsigned short uint16_t
Definition: ms_stdint.h:84
BenchTimer.h
eigen_block_size_k
int eigen_block_size_k
Definition: benchmark-blocking-sizes.cpp:19
log2_pot
uint8_t log2_pot(size_t x)
Definition: benchmark-blocking-sizes.cpp:76
try_run_some_benchmarks
void try_run_some_benchmarks(vector< benchmark_t > &benchmarks, double time_start, size_t &first_benchmark_to_run)
Definition: benchmark-blocking-sizes.cpp:385
gtsam.examples.DogLegOptimizerExample.float
float
Definition: DogLegOptimizerExample.py:113
measure_all_pot_sizes_action_t
Definition: benchmark-blocking-sizes.cpp:565
benchmark_t::run
void run()
Definition: benchmark-blocking-sizes.cpp:153
size_triple_t::k
uint16_t k
Definition: analyze-blocking-sizes.cpp:48
benchmark_t::benchmark_t
benchmark_t(size_t pk, size_t pm, size_t pn, size_t bk, size_t bm, size_t bn)
Definition: benchmark-blocking-sizes.cpp:110
align_3::t
Point2 t(10, 10)
action_t::~action_t
virtual ~action_t()
Definition: benchmark-blocking-sizes.cpp:273
matlab_wrap.file
file
Definition: matlab_wrap.py:57
i
int i
Definition: BiCGSTAB_step_by_step.cpp:9
size_triple_t
Definition: analyze-blocking-sizes.cpp:46
eigen_block_size_n
int eigen_block_size_n
Definition: benchmark-blocking-sizes.cpp:19
benchmark_t::benchmark_t
benchmark_t(size_t pk, size_t pm, size_t pn)
Definition: benchmark-blocking-sizes.cpp:117
action_t::invokation_name
virtual const char * invokation_name() const
Definition: benchmark-blocking-sizes.cpp:271
Eigen::Index
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:74


gtsam
Author(s):
autogenerated on Fri Nov 1 2024 03:32:00