21 #include <boost/assign/list_of.hpp> 26 using namespace gtsam;
27 using boost::assign::list_of;
31 #include <tbb/blocked_range.h> 32 #include <tbb/tick_count.h> 33 #include <tbb/parallel_for.h> 34 #include <tbb/cache_aligned_allocator.h> 35 #include <tbb/task_arena.h> 36 #include <tbb/task_group.h> 38 static const DenseIndex numberOfProblems = 1000000;
44 struct ResultWithThreads
46 typedef map<int, double>::value_type value_type;
47 map<int, double> grainSizesWithoutAllocation;
48 map<int, double> grainSizesWithAllocation;
51 typedef map<int, ResultWithThreads> Results;
54 struct WorkerWithoutAllocation
58 WorkerWithoutAllocation(vector<double>& results) : results(results) {}
60 void operator()(
const tbb::blocked_range<size_t>& r)
const 62 for(
size_t i = r.begin();
i != r.end(); ++
i)
64 FixedMatrix
m1 = FixedMatrix::Random();
65 FixedMatrix
m2 = FixedMatrix::Random();
66 FixedMatrix
prod = m1 *
m2;
67 results[
i] = prod.norm();
73 map<int, double> testWithoutMemoryAllocation(
int num_threads)
78 tbb::task_arena arena(num_threads);
82 vector<double>
results(numberOfProblems);
84 const vector<size_t> grainSizes = list_of(1)(10)(100)(1000);
85 map<int, double> timingResults;
86 for(
size_t grainSize: grainSizes)
88 tbb::tick_count t0 = tbb::tick_count::now();
93 tbb::parallel_for(tbb::blocked_range<size_t>(0, numberOfProblems), WorkerWithoutAllocation(results));
97 tbb::tick_count t1 = tbb::tick_count::now();
98 cout <<
"Without memory allocation, grain size = " << grainSize <<
", time = " << (t1 - t0).seconds() << endl;
99 timingResults[(
int)grainSize] = (t1 - t0).seconds();
102 return timingResults;
106 struct WorkerWithAllocation
110 WorkerWithAllocation(vector<double>& results) : results(results) {}
112 void operator()(
const tbb::blocked_range<size_t>& r)
const 114 tbb::cache_aligned_allocator<double> allocator;
115 for(
size_t i = r.begin();
i != r.end(); ++
i)
117 double *m1data = allocator.allocate(problemSize * problemSize);
119 double *m2data = allocator.allocate(problemSize * problemSize);
121 double *proddata = allocator.allocate(problemSize * problemSize);
124 m1 = Eigen::Matrix4d::Random(problemSize, problemSize);
125 m2 = Eigen::Matrix4d::Random(problemSize, problemSize);
127 results[
i] =
prod.norm();
129 allocator.deallocate(m1data, problemSize * problemSize);
130 allocator.deallocate(m2data, problemSize * problemSize);
131 allocator.deallocate(proddata, problemSize * problemSize);
137 map<int, double> testWithMemoryAllocation(
int num_threads)
142 tbb::task_arena arena(num_threads);
146 vector<double>
results(numberOfProblems);
148 const vector<size_t> grainSizes = list_of(1)(10)(100)(1000);
149 map<int, double> timingResults;
150 for(
size_t grainSize: grainSizes)
152 tbb::tick_count t0 = tbb::tick_count::now();
157 tbb::parallel_for(tbb::blocked_range<size_t>(0, numberOfProblems), WorkerWithAllocation(results));
161 tbb::tick_count t1 = tbb::tick_count::now();
162 cout <<
"With memory allocation, grain size = " << grainSize <<
", time = " << (t1 - t0).seconds() << endl;
163 timingResults[(
int)grainSize] = (t1 - t0).seconds();
166 return timingResults;
170 int main(
int argc,
char* argv[])
172 cout <<
"numberOfProblems = " << numberOfProblems << endl;
173 cout <<
"problemSize = " << problemSize << endl;
175 const vector<int> numThreads = list_of(1)(4)(8);
178 for(
size_t n: numThreads)
180 cout <<
"With " <<
n <<
" threads:" << endl;
181 results[(
int)
n].grainSizesWithoutAllocation = testWithoutMemoryAllocation((
int)
n);
182 results[(
int)n].grainSizesWithAllocation = testWithMemoryAllocation((
int)n);
186 cout << "Summary of results:" << endl;
187 for(const Results::value_type& threads_result: results)
189 const int threads = threads_result.first;
190 const ResultWithThreads&
result = threads_result.second;
193 for(
const ResultWithThreads::value_type& grainsize_time: result.grainSizesWithoutAllocation)
195 const int grainsize = grainsize_time.first;
196 const double speedup = results[1].grainSizesWithoutAllocation[grainsize] / grainsize_time.second;
197 cout << threads <<
" threads, without allocation, grain size = " << grainsize <<
", speedup = " << speedup << endl;
199 for(
const ResultWithThreads::value_type& grainsize_time: result.grainSizesWithAllocation)
201 const int grainsize = grainsize_time.first;
202 const double speedup = results[1].grainSizesWithAllocation[grainsize] / grainsize_time.second;
203 cout << threads <<
" threads, with allocation, grain size = " << grainsize <<
", speedup = " << speedup << endl;
214 int main(
int argc,
char* argv [])
216 cout <<
"GTSAM is compiled without TBB, please compile with TBB to use this program." << endl;
for(size_t i=1;i< poses.size();++i)
A matrix or vector expression mapping an existing array of data.
ptrdiff_t DenseIndex
The index type for Eigen objects.
Included from all GTSAM files.
EIGEN_DONT_INLINE void prod(const Lhs &a, const Rhs &b, Res &c)
std::map< std::string, Array< float, 1, 8, DontAlign|RowMajor > > results
The matrix class, also used for vectors and row-vectors.
int main(int argc, char *argv[])