10 #ifndef EIGEN_PARALLELIZER_H 11 #define EIGEN_PARALLELIZER_H 30 #ifdef EIGEN_HAS_OPENMP 34 *v = omp_get_max_threads();
52 std::ptrdiff_t l1, l2, l3;
85 template<
bool Condition,
typename Functor,
typename Index>
90 #if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS) 108 Index size = transpose ? rows : cols;
109 Index pb_max_threads = std::max<Index>(1,size / 32);
111 double work =
static_cast<double>(rows) * static_cast<double>(cols) *
112 static_cast<double>(depth);
113 double kMinTaskSize = 50000;
114 pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, work / kMinTaskSize));
122 if((!Condition) || (threads==1) || (omp_get_num_threads()>1))
123 return func(0,rows, 0,cols);
126 func.initParallelSession(threads);
133 #pragma omp parallel num_threads(threads) 135 Index i = omp_get_thread_num();
137 Index actual_threads = omp_get_num_threads();
139 Index blockCols = (cols / actual_threads) & ~
Index(0x3);
140 Index blockRows = (rows / actual_threads);
141 blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;
143 Index r0 = i*blockRows;
144 Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows;
146 Index c0 = i*blockCols;
147 Index actualBlockCols = (i+1==actual_threads) ? cols-c0 : blockCols;
149 info[i].lhs_start = r0;
150 info[i].lhs_length = actualBlockRows;
152 if(transpose) func(c0, actualBlockCols, 0, rows, info);
153 else func(0, rows, c0, actualBlockCols, info);
162 #endif // EIGEN_PARALLELIZER_H
void manage_caching_sizes(Action action, std::ptrdiff_t *l1, std::ptrdiff_t *l2, std::ptrdiff_t *l3)
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
#define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER)
#define eigen_internal_assert(x)
void parallelize_gemm(const Functor &func, Index rows, Index cols, Index depth, bool transpose)
void manage_multi_threading(Action action, int *v)
#define EIGEN_UNUSED_VARIABLE(var)
void swap(scoped_array< T > &a, scoped_array< T > &b)