25 using namespace gtsam;
29 #include <tbb/blocked_range.h> 30 #include <tbb/tick_count.h> 31 #include <tbb/parallel_for.h> 32 #include <tbb/cache_aligned_allocator.h> 33 #include <tbb/task_arena.h> 34 #include <tbb/task_group.h> 36 static const DenseIndex numberOfProblems = 1000000;
42 struct ResultWithThreads
44 typedef map<int, double>::value_type value_type;
45 map<int, double> grainSizesWithoutAllocation;
46 map<int, double> grainSizesWithAllocation;
49 typedef map<int, ResultWithThreads> Results;
52 struct WorkerWithoutAllocation
56 WorkerWithoutAllocation(vector<double>& results) : results(results) {}
58 void operator()(
const tbb::blocked_range<size_t>& r)
const 60 for(
size_t i = r.begin();
i != r.end(); ++
i)
62 FixedMatrix
m1 = FixedMatrix::Random();
63 FixedMatrix
m2 = FixedMatrix::Random();
64 FixedMatrix
prod = m1 *
m2;
65 results[
i] = prod.norm();
71 map<int, double> testWithoutMemoryAllocation(
int num_threads)
76 tbb::task_arena arena(num_threads);
80 vector<double>
results(numberOfProblems);
82 const vector<size_t> grainSizes = {1, 10, 100, 1000};
83 map<int, double> timingResults;
84 for(
size_t grainSize: grainSizes)
86 tbb::tick_count t0 = tbb::tick_count::now();
91 tbb::parallel_for(tbb::blocked_range<size_t>(0, numberOfProblems), WorkerWithoutAllocation(results));
95 tbb::tick_count t1 = tbb::tick_count::now();
96 cout <<
"Without memory allocation, grain size = " << grainSize <<
", time = " << (t1 - t0).seconds() << endl;
97 timingResults[(
int)grainSize] = (t1 - t0).seconds();
100 return timingResults;
104 struct WorkerWithAllocation
108 WorkerWithAllocation(vector<double>& results) : results(results) {}
110 void operator()(
const tbb::blocked_range<size_t>& r)
const 112 tbb::cache_aligned_allocator<double> allocator;
113 for(
size_t i = r.begin();
i != r.end(); ++
i)
115 double *m1data = allocator.allocate(problemSize * problemSize);
117 double *m2data = allocator.allocate(problemSize * problemSize);
119 double *proddata = allocator.allocate(problemSize * problemSize);
122 m1 = Eigen::Matrix4d::Random(problemSize, problemSize);
123 m2 = Eigen::Matrix4d::Random(problemSize, problemSize);
125 results[
i] =
prod.norm();
127 allocator.deallocate(m1data, problemSize * problemSize);
128 allocator.deallocate(m2data, problemSize * problemSize);
129 allocator.deallocate(proddata, problemSize * problemSize);
135 map<int, double> testWithMemoryAllocation(
int num_threads)
140 tbb::task_arena arena(num_threads);
144 vector<double>
results(numberOfProblems);
146 const vector<size_t> grainSizes = {1, 10, 100, 1000};
147 map<int, double> timingResults;
148 for(
size_t grainSize: grainSizes)
150 tbb::tick_count t0 = tbb::tick_count::now();
155 tbb::parallel_for(tbb::blocked_range<size_t>(0, numberOfProblems), WorkerWithAllocation(results));
159 tbb::tick_count t1 = tbb::tick_count::now();
160 cout <<
"With memory allocation, grain size = " << grainSize <<
", time = " << (t1 - t0).seconds() << endl;
161 timingResults[(
int)grainSize] = (t1 - t0).seconds();
164 return timingResults;
168 int main(
int argc,
char* argv[])
170 cout <<
"numberOfProblems = " << numberOfProblems << endl;
171 cout <<
"problemSize = " << problemSize << endl;
173 const vector<int> numThreads = {1, 4, 8};
176 for(
size_t n: numThreads)
178 cout <<
"With " <<
n <<
" threads:" << endl;
179 results[(
int)
n].grainSizesWithoutAllocation = testWithoutMemoryAllocation((
int)
n);
180 results[(
int)
n].grainSizesWithAllocation = testWithMemoryAllocation((
int)
n);
184 cout <<
"Summary of results:" << endl;
185 for(
const Results::value_type& threads_result: results)
187 const int threads = threads_result.first;
188 const ResultWithThreads&
result = threads_result.second;
191 for(
const ResultWithThreads::value_type& grainsize_time: result.grainSizesWithoutAllocation)
193 const int grainsize = grainsize_time.first;
194 const double speedup = results[1].grainSizesWithoutAllocation[grainsize] / grainsize_time.second;
195 cout << threads <<
" threads, without allocation, grain size = " << grainsize <<
", speedup = " << speedup << endl;
197 for(
const ResultWithThreads::value_type& grainsize_time: result.grainSizesWithAllocation)
199 const int grainsize = grainsize_time.first;
200 const double speedup = results[1].grainSizesWithAllocation[grainsize] / grainsize_time.second;
201 cout << threads <<
" threads, with allocation, grain size = " << grainsize <<
", speedup = " << speedup << endl;
212 int main(
int argc,
char* argv [])
214 cout <<
"GTSAM is compiled without TBB, please compile with TBB to use this program." << endl;
A matrix or vector expression mapping an existing array of data.
int main(int argc, char *argv [])
ptrdiff_t DenseIndex
The index type for Eigen objects.
Included from all GTSAM files.
EIGEN_DONT_INLINE void prod(const Lhs &a, const Rhs &b, Res &c)
std::map< std::string, Array< float, 1, 8, DontAlign|RowMajor > > results
The matrix class, also used for vectors and row-vectors.
internal::enable_if< internal::valid_indexed_view_overload< RowIndices, ColIndices >::value &&internal::traits< typename EIGEN_INDEXED_VIEW_METHOD_TYPE< RowIndices, ColIndices >::type >::ReturnAsIndexedView, typename EIGEN_INDEXED_VIEW_METHOD_TYPE< RowIndices, ColIndices >::type >::type operator()(const RowIndices &rowIndices, const ColIndices &colIndices) EIGEN_INDEXED_VIEW_METHOD_CONST