gtsam: TimeTBB.cpp Source File

Go to the documentation of this file.
 /* ----------------------------------------------------------------------------
 
 * GTSAM Copyright 2010, Georgia Tech Research Corporation,
 * Atlanta, Georgia 30332-0415
 * All Rights Reserved
 * Authors: Frank Dellaert, et al. (see THANKS for the full author list)
 
 * See LICENSE for the license information
 * -------------------------------------------------------------------------- */
 
 #include <gtsam/global_includes.h>
 #include <gtsam/base/Matrix.h>
 
 #include <boost/assign/list_of.hpp>
 #include <map>
 #include <iostream>
 
 using namespace std;
 using namespace gtsam;
 using boost::assign::list_of;
 
 #ifdef GTSAM_USE_TBB
 
 #include <tbb/blocked_range.h>           // tbb::blocked_range
 #include <tbb/tick_count.h>              // tbb::tick_count
 #include <tbb/parallel_for.h>            // tbb::parallel_for
 #include <tbb/cache_aligned_allocator.h> // tbb::cache_aligned_allocator
 #include <tbb/task_arena.h>              // tbb::task_arena
 #include <tbb/task_group.h>              // tbb::task_group
 
 static const DenseIndex numberOfProblems = 1000000;
 static const DenseIndex problemSize = 4;
 
 typedef Eigen::Matrix<double, problemSize, problemSize> FixedMatrix;
 
 /* ************************************************************************* */
 struct ResultWithThreads
 {
   typedef map<int, double>::value_type value_type;
   map<int, double> grainSizesWithoutAllocation;
   map<int, double> grainSizesWithAllocation;
 };
 
 typedef map<int, ResultWithThreads> Results;
 
 /* ************************************************************************* */
 struct WorkerWithoutAllocation
 {
   vector<double>& results;
 
   WorkerWithoutAllocation(vector<double>& results) : results(results) {}
 
   void operator()(const tbb::blocked_range<size_t>& r) const
   {
     for(size_t i = r.begin(); i != r.end(); ++i)
     {
       FixedMatrix m1 = FixedMatrix::Random();
       FixedMatrix m2 = FixedMatrix::Random();
       FixedMatrix prod = m1 * m2;
       results[i] = prod.norm();
     }
   }
 };
 
 /* ************************************************************************* */
 map<int, double> testWithoutMemoryAllocation(int num_threads)
 {
   // A function to do some matrix operations without allocating any memory
 
   // Create task_arena and task_group
   tbb::task_arena arena(num_threads);
   tbb::task_group tg;
 
   // Now call it
   vector<double> results(numberOfProblems);
 
   const vector<size_t> grainSizes = list_of(1)(10)(100)(1000);
   map<int, double> timingResults;
   for(size_t grainSize: grainSizes)
   {
     tbb::tick_count t0 = tbb::tick_count::now();
 
     // Run parallel code (as a task group) inside of task arena
     arena.execute([&]{
       tg.run_and_wait([&]{
         tbb::parallel_for(tbb::blocked_range<size_t>(0, numberOfProblems), WorkerWithoutAllocation(results));
       });
     });
 
     tbb::tick_count t1 = tbb::tick_count::now();
     cout << "Without memory allocation, grain size = " << grainSize << ", time = " << (t1 - t0).seconds() << endl;
     timingResults[(int)grainSize] = (t1 - t0).seconds();
   }
 
   return timingResults;
 }
 
 /* ************************************************************************* */
 struct WorkerWithAllocation
 {
   vector<double>& results;
 
   WorkerWithAllocation(vector<double>& results) : results(results) {}
 
   void operator()(const tbb::blocked_range<size_t>& r) const
   {
     tbb::cache_aligned_allocator<double> allocator;
     for(size_t i = r.begin(); i != r.end(); ++i)
     {
       double *m1data = allocator.allocate(problemSize * problemSize);
       Eigen::Map<Matrix> m1(m1data, problemSize, problemSize);
       double *m2data = allocator.allocate(problemSize * problemSize);
       Eigen::Map<Matrix> m2(m2data, problemSize, problemSize);
       double *proddata = allocator.allocate(problemSize * problemSize);
       Eigen::Map<Matrix> prod(proddata, problemSize, problemSize);
 
       m1 = Eigen::Matrix4d::Random(problemSize, problemSize);
       m2 = Eigen::Matrix4d::Random(problemSize, problemSize);
       prod = m1 * m2;
       results[i] = prod.norm();
 
       allocator.deallocate(m1data, problemSize * problemSize);
       allocator.deallocate(m2data, problemSize * problemSize);
       allocator.deallocate(proddata, problemSize * problemSize);
     }
   }
 };
 
 /* ************************************************************************* */
 map<int, double> testWithMemoryAllocation(int num_threads)
 {
   // A function to do some matrix operations with allocating memory
 
   // Create task_arena and task_group
   tbb::task_arena arena(num_threads);
   tbb::task_group tg;
 
   // Now call it
   vector<double> results(numberOfProblems);
 
   const vector<size_t> grainSizes = list_of(1)(10)(100)(1000);
   map<int, double> timingResults;
   for(size_t grainSize: grainSizes)
   {
     tbb::tick_count t0 = tbb::tick_count::now();
 
     // Run parallel code (as a task group) inside of task arena
     arena.execute([&]{
       tg.run_and_wait([&]{
         tbb::parallel_for(tbb::blocked_range<size_t>(0, numberOfProblems), WorkerWithAllocation(results));
       });
     });
 
     tbb::tick_count t1 = tbb::tick_count::now();
     cout << "With memory allocation, grain size = " << grainSize << ", time = " << (t1 - t0).seconds() << endl;
     timingResults[(int)grainSize] = (t1 - t0).seconds();
   }
 
   return timingResults;
 }
 
 /* ************************************************************************* */
 int main(int argc, char* argv[])
 {
   cout << "numberOfProblems = " << numberOfProblems << endl;
   cout << "problemSize = " << problemSize << endl;
 
   const vector<int> numThreads = list_of(1)(4)(8);
   Results results;
 
   for(size_t n: numThreads)
   {
     cout << "With " << n << " threads:" << endl;
     results[(int)n].grainSizesWithoutAllocation = testWithoutMemoryAllocation((int)n);
     results[(int)n].grainSizesWithAllocation = testWithMemoryAllocation((int)n);
     cout << endl;
   }
 
   cout << "Summary of results:" << endl;
   for(const Results::value_type& threads_result: results)
   {
     const int threads = threads_result.first;
     const ResultWithThreads& result = threads_result.second;
     if(threads != 1)
     {
       for(const ResultWithThreads::value_type& grainsize_time: result.grainSizesWithoutAllocation)
       {
         const int grainsize = grainsize_time.first;
         const double speedup = results[1].grainSizesWithoutAllocation[grainsize] / grainsize_time.second;
         cout << threads << " threads, without allocation, grain size = " << grainsize << ", speedup = " << speedup << endl;
       }
       for(const ResultWithThreads::value_type& grainsize_time: result.grainSizesWithAllocation)
       {
         const int grainsize = grainsize_time.first;
         const double speedup = results[1].grainSizesWithAllocation[grainsize] / grainsize_time.second;
         cout << threads << " threads, with allocation, grain size = " << grainsize << ", speedup = " << speedup << endl;
       }
     }
   }
 
   return 0;
 }
 
 #else
 
 /* ************************************************************************* */
 int main(int argc, char* argv [])
 {
   cout << "GTSAM is compiled without TBB, please compile with TBB to use this program." << endl;
   return 0;
 }
 
 #endif