25 using namespace gtsam;
29 #include <tbb/blocked_range.h>
30 #include <tbb/tick_count.h>
31 #include <tbb/parallel_for.h>
32 #include <tbb/cache_aligned_allocator.h>
33 #include <tbb/task_arena.h>
34 #include <tbb/task_group.h>
36 static const DenseIndex numberOfProblems = 1000000;
42 struct ResultWithThreads
44 typedef map<int, double>::value_type value_type;
45 map<int, double> grainSizesWithoutAllocation;
46 map<int, double> grainSizesWithAllocation;
49 typedef map<int, ResultWithThreads> Results;
52 struct WorkerWithoutAllocation
58 void operator()(
const tbb::blocked_range<size_t>& r)
const
60 for(
size_t i = r.begin();
i != r.end(); ++
i)
62 FixedMatrix
m1 = FixedMatrix::Random();
63 FixedMatrix
m2 = FixedMatrix::Random();
71 map<int, double> testWithoutMemoryAllocation(
int num_threads)
76 tbb::task_arena arena(num_threads);
80 vector<double>
results(numberOfProblems);
82 const vector<size_t> grainSizes = {1, 10, 100, 1000};
83 map<int, double> timingResults;
84 for(
size_t grainSize: grainSizes)
86 tbb::tick_count t0 = tbb::tick_count::now();
91 tbb::parallel_for(tbb::blocked_range<size_t>(0, numberOfProblems), WorkerWithoutAllocation(
results));
95 tbb::tick_count t1 = tbb::tick_count::now();
96 cout <<
"Without memory allocation, grain size = " << grainSize <<
", time = " << (t1 - t0).seconds() << endl;
97 timingResults[(
int)grainSize] = (t1 - t0).seconds();
100 return timingResults;
104 struct WorkerWithAllocation
110 void operator()(
const tbb::blocked_range<size_t>& r)
const
112 tbb::cache_aligned_allocator<double> allocator;
113 for(
size_t i = r.begin();
i != r.end(); ++
i)
115 double *m1data = allocator.allocate(problemSize * problemSize);
117 double *m2data = allocator.allocate(problemSize * problemSize);
119 double *proddata = allocator.allocate(problemSize * problemSize);
122 m1 = Eigen::Matrix4d::Random(problemSize, problemSize);
123 m2 = Eigen::Matrix4d::Random(problemSize, problemSize);
127 allocator.deallocate(m1data, problemSize * problemSize);
128 allocator.deallocate(m2data, problemSize * problemSize);
129 allocator.deallocate(proddata, problemSize * problemSize);
135 map<int, double> testWithMemoryAllocation(
int num_threads)
140 tbb::task_arena arena(num_threads);
144 vector<double>
results(numberOfProblems);
146 const vector<size_t> grainSizes = {1, 10, 100, 1000};
147 map<int, double> timingResults;
148 for(
size_t grainSize: grainSizes)
150 tbb::tick_count t0 = tbb::tick_count::now();
155 tbb::parallel_for(tbb::blocked_range<size_t>(0, numberOfProblems), WorkerWithAllocation(
results));
159 tbb::tick_count t1 = tbb::tick_count::now();
160 cout <<
"With memory allocation, grain size = " << grainSize <<
", time = " << (t1 - t0).seconds() << endl;
161 timingResults[(
int)grainSize] = (t1 - t0).seconds();
164 return timingResults;
168 int main(
int argc,
char* argv[])
170 cout <<
"numberOfProblems = " << numberOfProblems << endl;
171 cout <<
"problemSize = " << problemSize << endl;
173 const vector<int> numThreads = {1, 4, 8};
176 for(
size_t n: numThreads)
178 cout <<
"With " <<
n <<
" threads:" << endl;
179 results[(
int)
n].grainSizesWithoutAllocation = testWithoutMemoryAllocation((
int)
n);
180 results[(
int)
n].grainSizesWithAllocation = testWithMemoryAllocation((
int)
n);
184 cout <<
"Summary of results:" << endl;
185 for(
const Results::value_type& threads_result:
results)
187 const int threads = threads_result.first;
188 const ResultWithThreads&
result = threads_result.second;
191 for(
const ResultWithThreads::value_type& grainsize_time:
result.grainSizesWithoutAllocation)
193 const int grainsize = grainsize_time.first;
194 const double speedup =
results[1].grainSizesWithoutAllocation[grainsize] / grainsize_time.second;
195 cout << threads <<
" threads, without allocation, grain size = " << grainsize <<
", speedup = " << speedup << endl;
197 for(
const ResultWithThreads::value_type& grainsize_time:
result.grainSizesWithAllocation)
199 const int grainsize = grainsize_time.first;
200 const double speedup =
results[1].grainSizesWithAllocation[grainsize] / grainsize_time.second;
201 cout << threads <<
" threads, with allocation, grain size = " << grainsize <<
", speedup = " << speedup << endl;
212 int main(
int argc,
char* argv [])
214 cout <<
"GTSAM is compiled without TBB, please compile with TBB to use this program." << endl;