Parallelizer.h
Go to the documentation of this file.
00001 // This file is part of Eigen, a lightweight C++ template library
00002 // for linear algebra.
00003 //
00004 // Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
00005 //
00006 // This Source Code Form is subject to the terms of the Mozilla
00007 // Public License v. 2.0. If a copy of the MPL was not distributed
00008 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
00009 
00010 #ifndef EIGEN_PARALLELIZER_H
00011 #define EIGEN_PARALLELIZER_H
00012 
00013 namespace Eigen { 
00014 
00015 namespace internal {
00016 
00018 inline void manage_multi_threading(Action action, int* v)
00019 {
00020   static EIGEN_UNUSED int m_maxThreads = -1;
00021 
00022   if(action==SetAction)
00023   {
00024     eigen_internal_assert(v!=0);
00025     m_maxThreads = *v;
00026   }
00027   else if(action==GetAction)
00028   {
00029     eigen_internal_assert(v!=0);
00030     #ifdef EIGEN_HAS_OPENMP
00031     if(m_maxThreads>0)
00032       *v = m_maxThreads;
00033     else
00034       *v = omp_get_max_threads();
00035     #else
00036     *v = 1;
00037     #endif
00038   }
00039   else
00040   {
00041     eigen_internal_assert(false);
00042   }
00043 }
00044 
00045 }
00046 
00048 inline void initParallel()
00049 {
00050   int nbt;
00051   internal::manage_multi_threading(GetAction, &nbt);
00052   std::ptrdiff_t l1, l2;
00053   internal::manage_caching_sizes(GetAction, &l1, &l2);
00054 }
00055 
00058 inline int nbThreads()
00059 {
00060   int ret;
00061   internal::manage_multi_threading(GetAction, &ret);
00062   return ret;
00063 }
00064 
00067 inline void setNbThreads(int v)
00068 {
00069   internal::manage_multi_threading(SetAction, &v);
00070 }
00071 
00072 namespace internal {
00073 
00074 template<typename Index> struct GemmParallelInfo
00075 {
00076   GemmParallelInfo() : sync(-1), users(0), rhs_start(0), rhs_length(0) {}
00077 
00078   int volatile sync;
00079   int volatile users;
00080 
00081   Index rhs_start;
00082   Index rhs_length;
00083 };
00084 
00085 template<bool Condition, typename Functor, typename Index>
00086 void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose)
00087 {
00088   // TODO when EIGEN_USE_BLAS is defined,
00089   // we should still enable OMP for other scalar types
00090 #if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS)
00091   // FIXME the transpose variable is only needed to properly split
00092   // the matrix product when multithreading is enabled. This is a temporary
00093   // fix to support row-major destination matrices. This whole
00094   // parallelizer mechanism has to be redisigned anyway.
00095   EIGEN_UNUSED_VARIABLE(transpose);
00096   func(0,rows, 0,cols);
00097 #else
00098 
00099   // Dynamically check whether we should enable or disable OpenMP.
00100   // The conditions are:
00101   // - the max number of threads we can create is greater than 1
00102   // - we are not already in a parallel code
00103   // - the sizes are large enough
00104 
00105   // 1- are we already in a parallel session?
00106   // FIXME omp_get_num_threads()>1 only works for openmp, what if the user does not use openmp?
00107   if((!Condition) || (omp_get_num_threads()>1))
00108     return func(0,rows, 0,cols);
00109 
00110   Index size = transpose ? cols : rows;
00111 
00112   // 2- compute the maximal number of threads from the size of the product:
00113   // FIXME this has to be fine tuned
00114   Index max_threads = std::max<Index>(1,size / 32);
00115 
00116   // 3 - compute the number of threads we are going to use
00117   Index threads = std::min<Index>(nbThreads(), max_threads);
00118 
00119   if(threads==1)
00120     return func(0,rows, 0,cols);
00121 
00122   Eigen::initParallel();
00123   func.initParallelSession();
00124 
00125   if(transpose)
00126     std::swap(rows,cols);
00127 
00128   GemmParallelInfo<Index>* info = new GemmParallelInfo<Index>[threads];
00129 
00130   #pragma omp parallel num_threads(threads)
00131   {
00132     Index i = omp_get_thread_num();
00133     // Note that the actual number of threads might be lower than the number of request ones.
00134     Index actual_threads = omp_get_num_threads();
00135     
00136     Index blockCols = (cols / actual_threads) & ~Index(0x3);
00137     Index blockRows = (rows / actual_threads) & ~Index(0x7);
00138     
00139     Index r0 = i*blockRows;
00140     Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows;
00141 
00142     Index c0 = i*blockCols;
00143     Index actualBlockCols = (i+1==actual_threads) ? cols-c0 : blockCols;
00144 
00145     info[i].rhs_start = c0;
00146     info[i].rhs_length = actualBlockCols;
00147 
00148     if(transpose)
00149       func(0, cols, r0, actualBlockRows, info);
00150     else
00151       func(r0, actualBlockRows, 0,cols, info);
00152   }
00153 
00154   delete[] info;
00155 #endif
00156 }
00157 
00158 } // end namespace internal
00159 
00160 } // end namespace Eigen
00161 
00162 #endif // EIGEN_PARALLELIZER_H


turtlebot_exploration_3d
Author(s): Bona , Shawn
autogenerated on Thu Jun 6 2019 20:59:07