TimeTBB.cpp
Go to the documentation of this file.
1 /* ----------------------------------------------------------------------------
2 
3 * GTSAM Copyright 2010, Georgia Tech Research Corporation,
4 * Atlanta, Georgia 30332-0415
5 * All Rights Reserved
6 * Authors: Frank Dellaert, et al. (see THANKS for the full author list)
7 
8 * See LICENSE for the license information
9 * -------------------------------------------------------------------------- */
10 
18 #include <gtsam/global_includes.h>
19 #include <gtsam/base/Matrix.h>
20 
21 #include <boost/assign/list_of.hpp>
22 #include <map>
23 #include <iostream>
24 
25 using namespace std;
26 using namespace gtsam;
27 using boost::assign::list_of;
28 
29 #ifdef GTSAM_USE_TBB
30 
31 #include <tbb/blocked_range.h> // tbb::blocked_range
32 #include <tbb/tick_count.h> // tbb::tick_count
33 #include <tbb/parallel_for.h> // tbb::parallel_for
34 #include <tbb/cache_aligned_allocator.h> // tbb::cache_aligned_allocator
35 #include <tbb/task_arena.h> // tbb::task_arena
36 #include <tbb/task_group.h> // tbb::task_group
37 
38 static const DenseIndex numberOfProblems = 1000000;
39 static const DenseIndex problemSize = 4;
40 
42 
43 /* ************************************************************************* */
44 struct ResultWithThreads
45 {
46  typedef map<int, double>::value_type value_type;
47  map<int, double> grainSizesWithoutAllocation;
48  map<int, double> grainSizesWithAllocation;
49 };
50 
51 typedef map<int, ResultWithThreads> Results;
52 
53 /* ************************************************************************* */
54 struct WorkerWithoutAllocation
55 {
56  vector<double>& results;
57 
58  WorkerWithoutAllocation(vector<double>& results) : results(results) {}
59 
60  void operator()(const tbb::blocked_range<size_t>& r) const
61  {
62  for(size_t i = r.begin(); i != r.end(); ++i)
63  {
64  FixedMatrix m1 = FixedMatrix::Random();
65  FixedMatrix m2 = FixedMatrix::Random();
66  FixedMatrix prod = m1 * m2;
67  results[i] = prod.norm();
68  }
69  }
70 };
71 
72 /* ************************************************************************* */
73 map<int, double> testWithoutMemoryAllocation(int num_threads)
74 {
75  // A function to do some matrix operations without allocating any memory
76 
77  // Create task_arena and task_group
78  tbb::task_arena arena(num_threads);
79  tbb::task_group tg;
80 
81  // Now call it
82  vector<double> results(numberOfProblems);
83 
84  const vector<size_t> grainSizes = list_of(1)(10)(100)(1000);
85  map<int, double> timingResults;
86  for(size_t grainSize: grainSizes)
87  {
88  tbb::tick_count t0 = tbb::tick_count::now();
89 
90  // Run parallel code (as a task group) inside of task arena
91  arena.execute([&]{
92  tg.run_and_wait([&]{
93  tbb::parallel_for(tbb::blocked_range<size_t>(0, numberOfProblems), WorkerWithoutAllocation(results));
94  });
95  });
96 
97  tbb::tick_count t1 = tbb::tick_count::now();
98  cout << "Without memory allocation, grain size = " << grainSize << ", time = " << (t1 - t0).seconds() << endl;
99  timingResults[(int)grainSize] = (t1 - t0).seconds();
100  }
101 
102  return timingResults;
103 }
104 
105 /* ************************************************************************* */
106 struct WorkerWithAllocation
107 {
108  vector<double>& results;
109 
110  WorkerWithAllocation(vector<double>& results) : results(results) {}
111 
112  void operator()(const tbb::blocked_range<size_t>& r) const
113  {
114  tbb::cache_aligned_allocator<double> allocator;
115  for(size_t i = r.begin(); i != r.end(); ++i)
116  {
117  double *m1data = allocator.allocate(problemSize * problemSize);
118  Eigen::Map<Matrix> m1(m1data, problemSize, problemSize);
119  double *m2data = allocator.allocate(problemSize * problemSize);
120  Eigen::Map<Matrix> m2(m2data, problemSize, problemSize);
121  double *proddata = allocator.allocate(problemSize * problemSize);
122  Eigen::Map<Matrix> prod(proddata, problemSize, problemSize);
123 
124  m1 = Eigen::Matrix4d::Random(problemSize, problemSize);
125  m2 = Eigen::Matrix4d::Random(problemSize, problemSize);
126  prod = m1 * m2;
127  results[i] = prod.norm();
128 
129  allocator.deallocate(m1data, problemSize * problemSize);
130  allocator.deallocate(m2data, problemSize * problemSize);
131  allocator.deallocate(proddata, problemSize * problemSize);
132  }
133  }
134 };
135 
136 /* ************************************************************************* */
137 map<int, double> testWithMemoryAllocation(int num_threads)
138 {
139  // A function to do some matrix operations with allocating memory
140 
141  // Create task_arena and task_group
142  tbb::task_arena arena(num_threads);
143  tbb::task_group tg;
144 
145  // Now call it
146  vector<double> results(numberOfProblems);
147 
148  const vector<size_t> grainSizes = list_of(1)(10)(100)(1000);
149  map<int, double> timingResults;
150  for(size_t grainSize: grainSizes)
151  {
152  tbb::tick_count t0 = tbb::tick_count::now();
153 
154  // Run parallel code (as a task group) inside of task arena
155  arena.execute([&]{
156  tg.run_and_wait([&]{
157  tbb::parallel_for(tbb::blocked_range<size_t>(0, numberOfProblems), WorkerWithAllocation(results));
158  });
159  });
160 
161  tbb::tick_count t1 = tbb::tick_count::now();
162  cout << "With memory allocation, grain size = " << grainSize << ", time = " << (t1 - t0).seconds() << endl;
163  timingResults[(int)grainSize] = (t1 - t0).seconds();
164  }
165 
166  return timingResults;
167 }
168 
169 /* ************************************************************************* */
170 int main(int argc, char* argv[])
171 {
172  cout << "numberOfProblems = " << numberOfProblems << endl;
173  cout << "problemSize = " << problemSize << endl;
174 
175  const vector<int> numThreads = list_of(1)(4)(8);
176  Results results;
177 
178  for(size_t n: numThreads)
179  {
180  cout << "With " << n << " threads:" << endl;
181  results[(int)n].grainSizesWithoutAllocation = testWithoutMemoryAllocation((int)n);
182  results[(int)n].grainSizesWithAllocation = testWithMemoryAllocation((int)n);
183  cout << endl;
184  }
185 
186  cout << "Summary of results:" << endl;
187  for(const Results::value_type& threads_result: results)
188  {
189  const int threads = threads_result.first;
190  const ResultWithThreads& result = threads_result.second;
191  if(threads != 1)
192  {
193  for(const ResultWithThreads::value_type& grainsize_time: result.grainSizesWithoutAllocation)
194  {
195  const int grainsize = grainsize_time.first;
196  const double speedup = results[1].grainSizesWithoutAllocation[grainsize] / grainsize_time.second;
197  cout << threads << " threads, without allocation, grain size = " << grainsize << ", speedup = " << speedup << endl;
198  }
199  for(const ResultWithThreads::value_type& grainsize_time: result.grainSizesWithAllocation)
200  {
201  const int grainsize = grainsize_time.first;
202  const double speedup = results[1].grainSizesWithAllocation[grainsize] / grainsize_time.second;
203  cout << threads << " threads, with allocation, grain size = " << grainsize << ", speedup = " << speedup << endl;
204  }
205  }
206  }
207 
208  return 0;
209 }
210 
211 #else
212 
213 /* ************************************************************************* */
214 int main(int argc, char* argv [])
215 {
216  cout << "GTSAM is compiled without TBB, please compile with TBB to use this program." << endl;
217  return 0;
218 }
219 
220 #endif
return int(ret)+1
for(size_t i=1;i< poses.size();++i)
A matrix or vector expression mapping an existing array of data.
Definition: Map.h:94
MatrixType m2(n_dims)
int n
Definition: Half.h:150
ptrdiff_t DenseIndex
The index type for Eigen objects.
Definition: types.h:67
Included from all GTSAM files.
Values result
Matrix3d m1
Definition: IOFormat.cpp:2
EIGEN_DONT_INLINE void prod(const Lhs &a, const Rhs &b, Res &c)
traits
Definition: chartTesting.h:28
std::map< std::string, Array< float, 1, 8, DontAlign|RowMajor > > results
The matrix class, also used for vectors and row-vectors.
int main(int argc, char *argv[])
Definition: TimeTBB.cpp:214


gtsam
Author(s):
autogenerated on Sat May 8 2021 02:51:04