TimeTBB.cpp
Go to the documentation of this file.
1 /* ----------------------------------------------------------------------------
2 
3 * GTSAM Copyright 2010, Georgia Tech Research Corporation,
4 * Atlanta, Georgia 30332-0415
5 * All Rights Reserved
6 * Authors: Frank Dellaert, et al. (see THANKS for the full author list)
7 
8 * See LICENSE for the license information
9 * -------------------------------------------------------------------------- */
10 
18 #include <gtsam/global_includes.h>
19 #include <gtsam/base/Matrix.h>
20 
21 #include <map>
22 #include <iostream>
23 
24 using namespace std;
25 using namespace gtsam;
26 
27 #ifdef GTSAM_USE_TBB
28 
29 #include <tbb/blocked_range.h> // tbb::blocked_range
30 #include <tbb/tick_count.h> // tbb::tick_count
31 #include <tbb/parallel_for.h> // tbb::parallel_for
32 #include <tbb/cache_aligned_allocator.h> // tbb::cache_aligned_allocator
33 #include <tbb/task_arena.h> // tbb::task_arena
34 #include <tbb/task_group.h> // tbb::task_group
35 
36 static const DenseIndex numberOfProblems = 1000000;
37 static const DenseIndex problemSize = 4;
38 
40 
41 /* ************************************************************************* */
42 struct ResultWithThreads
43 {
44  typedef map<int, double>::value_type value_type;
45  map<int, double> grainSizesWithoutAllocation;
46  map<int, double> grainSizesWithAllocation;
47 };
48 
49 typedef map<int, ResultWithThreads> Results;
50 
51 /* ************************************************************************* */
52 struct WorkerWithoutAllocation
53 {
54  vector<double>& results;
55 
56  WorkerWithoutAllocation(vector<double>& results) : results(results) {}
57 
58  void operator()(const tbb::blocked_range<size_t>& r) const
59  {
60  for(size_t i = r.begin(); i != r.end(); ++i)
61  {
62  FixedMatrix m1 = FixedMatrix::Random();
63  FixedMatrix m2 = FixedMatrix::Random();
64  FixedMatrix prod = m1 * m2;
65  results[i] = prod.norm();
66  }
67  }
68 };
69 
70 /* ************************************************************************* */
71 map<int, double> testWithoutMemoryAllocation(int num_threads)
72 {
73  // A function to do some matrix operations without allocating any memory
74 
75  // Create task_arena and task_group
76  tbb::task_arena arena(num_threads);
77  tbb::task_group tg;
78 
79  // Now call it
80  vector<double> results(numberOfProblems);
81 
82  const vector<size_t> grainSizes = {1, 10, 100, 1000};
83  map<int, double> timingResults;
84  for(size_t grainSize: grainSizes)
85  {
86  tbb::tick_count t0 = tbb::tick_count::now();
87 
88  // Run parallel code (as a task group) inside of task arena
89  arena.execute([&]{
90  tg.run_and_wait([&]{
91  tbb::parallel_for(tbb::blocked_range<size_t>(0, numberOfProblems), WorkerWithoutAllocation(results));
92  });
93  });
94 
95  tbb::tick_count t1 = tbb::tick_count::now();
96  cout << "Without memory allocation, grain size = " << grainSize << ", time = " << (t1 - t0).seconds() << endl;
97  timingResults[(int)grainSize] = (t1 - t0).seconds();
98  }
99 
100  return timingResults;
101 }
102 
103 /* ************************************************************************* */
104 struct WorkerWithAllocation
105 {
106  vector<double>& results;
107 
108  WorkerWithAllocation(vector<double>& results) : results(results) {}
109 
110  void operator()(const tbb::blocked_range<size_t>& r) const
111  {
112  tbb::cache_aligned_allocator<double> allocator;
113  for(size_t i = r.begin(); i != r.end(); ++i)
114  {
115  double *m1data = allocator.allocate(problemSize * problemSize);
116  Eigen::Map<Matrix> m1(m1data, problemSize, problemSize);
117  double *m2data = allocator.allocate(problemSize * problemSize);
118  Eigen::Map<Matrix> m2(m2data, problemSize, problemSize);
119  double *proddata = allocator.allocate(problemSize * problemSize);
120  Eigen::Map<Matrix> prod(proddata, problemSize, problemSize);
121 
122  m1 = Eigen::Matrix4d::Random(problemSize, problemSize);
123  m2 = Eigen::Matrix4d::Random(problemSize, problemSize);
124  prod = m1 * m2;
125  results[i] = prod.norm();
126 
127  allocator.deallocate(m1data, problemSize * problemSize);
128  allocator.deallocate(m2data, problemSize * problemSize);
129  allocator.deallocate(proddata, problemSize * problemSize);
130  }
131  }
132 };
133 
134 /* ************************************************************************* */
135 map<int, double> testWithMemoryAllocation(int num_threads)
136 {
137  // A function to do some matrix operations with allocating memory
138 
139  // Create task_arena and task_group
140  tbb::task_arena arena(num_threads);
141  tbb::task_group tg;
142 
143  // Now call it
144  vector<double> results(numberOfProblems);
145 
146  const vector<size_t> grainSizes = {1, 10, 100, 1000};
147  map<int, double> timingResults;
148  for(size_t grainSize: grainSizes)
149  {
150  tbb::tick_count t0 = tbb::tick_count::now();
151 
152  // Run parallel code (as a task group) inside of task arena
153  arena.execute([&]{
154  tg.run_and_wait([&]{
155  tbb::parallel_for(tbb::blocked_range<size_t>(0, numberOfProblems), WorkerWithAllocation(results));
156  });
157  });
158 
159  tbb::tick_count t1 = tbb::tick_count::now();
160  cout << "With memory allocation, grain size = " << grainSize << ", time = " << (t1 - t0).seconds() << endl;
161  timingResults[(int)grainSize] = (t1 - t0).seconds();
162  }
163 
164  return timingResults;
165 }
166 
167 /* ************************************************************************* */
168 int main(int argc, char* argv[])
169 {
170  cout << "numberOfProblems = " << numberOfProblems << endl;
171  cout << "problemSize = " << problemSize << endl;
172 
173  const vector<int> numThreads = {1, 4, 8};
174  Results results;
175 
176  for(size_t n: numThreads)
177  {
178  cout << "With " << n << " threads:" << endl;
179  results[(int)n].grainSizesWithoutAllocation = testWithoutMemoryAllocation((int)n);
180  results[(int)n].grainSizesWithAllocation = testWithMemoryAllocation((int)n);
181  cout << endl;
182  }
183 
184  cout << "Summary of results:" << endl;
185  for(const Results::value_type& threads_result: results)
186  {
187  const int threads = threads_result.first;
188  const ResultWithThreads& result = threads_result.second;
189  if(threads != 1)
190  {
191  for(const ResultWithThreads::value_type& grainsize_time: result.grainSizesWithoutAllocation)
192  {
193  const int grainsize = grainsize_time.first;
194  const double speedup = results[1].grainSizesWithoutAllocation[grainsize] / grainsize_time.second;
195  cout << threads << " threads, without allocation, grain size = " << grainsize << ", speedup = " << speedup << endl;
196  }
197  for(const ResultWithThreads::value_type& grainsize_time: result.grainSizesWithAllocation)
198  {
199  const int grainsize = grainsize_time.first;
200  const double speedup = results[1].grainSizesWithAllocation[grainsize] / grainsize_time.second;
201  cout << threads << " threads, with allocation, grain size = " << grainsize << ", speedup = " << speedup << endl;
202  }
203  }
204  }
205 
206  return 0;
207 }
208 
209 #else
210 
211 /* ************************************************************************* */
212 int main(int argc, char* argv [])
213 {
214  cout << "GTSAM is compiled without TBB, please compile with TBB to use this program." << endl;
215  return 0;
216 }
217 
218 #endif
gtsam.examples.DogLegOptimizerExample.int
int
Definition: DogLegOptimizerExample.py:111
global_includes.h
Included from all GTSAM files.
Matrix.h
typedef and functions to augment Eigen's MatrixXd
m1
Matrix3d m1
Definition: IOFormat.cpp:2
result
Values result
Definition: OdometryOptimize.cpp:8
n
int n
Definition: BiCGSTAB_simple.cpp:1
m2
MatrixType m2(n_dims)
operator()
internal::enable_if< internal::valid_indexed_view_overload< RowIndices, ColIndices >::value &&internal::traits< typename EIGEN_INDEXED_VIEW_METHOD_TYPE< RowIndices, ColIndices >::type >::ReturnAsIndexedView, typename EIGEN_INDEXED_VIEW_METHOD_TYPE< RowIndices, ColIndices >::type >::type operator()(const RowIndices &rowIndices, const ColIndices &colIndices) EIGEN_INDEXED_VIEW_METHOD_CONST
Definition: IndexedViewMethods.h:73
Eigen::Map
A matrix or vector expression mapping an existing array of data.
Definition: Map.h:94
main
int main(int argc, char *argv[])
Definition: TimeTBB.cpp:212
gtsam
traits
Definition: SFMdata.h:40
gtsam::DenseIndex
ptrdiff_t DenseIndex
The index type for Eigen objects.
Definition: types.h:103
std
Definition: BFloat16.h:88
results
std::map< std::string, Array< float, 1, 8, DontAlign|RowMajor > > results
Definition: dense_solvers.cpp:10
Eigen::Matrix
The matrix class, also used for vectors and row-vectors.
Definition: 3rdparty/Eigen/Eigen/src/Core/Matrix.h:178
prod
EIGEN_DONT_INLINE void prod(const Lhs &a, const Rhs &b, Res &c)
Definition: product_threshold.cpp:39
i
int i
Definition: BiCGSTAB_step_by_step.cpp:9


gtsam
Author(s):
autogenerated on Wed Jan 1 2025 04:07:57