10 #ifndef EIGEN_GENERAL_MATRIX_MATRIX_H 11 #define EIGEN_GENERAL_MATRIX_MATRIX_H 22 typename LhsScalar,
int LhsStorageOrder,
bool ConjugateLhs,
23 typename RhsScalar,
int RhsStorageOrder,
bool ConjugateRhs,
32 const LhsScalar* lhs,
Index lhsStride,
33 const RhsScalar* rhs,
Index rhsStride,
44 ::run(cols,rows,depth,rhs,rhsStride,lhs,lhsStride,res,resIncr,resStride,alpha,blocking,
info);
52 typename LhsScalar,
int LhsStorageOrder,
bool ConjugateLhs,
53 typename RhsScalar,
int RhsStorageOrder,
bool ConjugateRhs,
62 const LhsScalar* _lhs,
Index lhsStride,
63 const RhsScalar* _rhs,
Index rhsStride,
64 ResScalar* _res,
Index resIncr,
Index resStride,
72 LhsMapper lhs(_lhs, lhsStride);
73 RhsMapper rhs(_rhs, rhsStride);
74 ResMapper
res(_res, resStride, resIncr);
84 #ifdef EIGEN_HAS_OPENMP 91 LhsScalar* blockA = blocking.
blockA();
104 pack_rhs(blockB, rhs.getSubMapper(k,0), actual_kc,
nc);
112 while(
info[tid].users!=0) {}
113 info[tid].users = threads;
115 pack_lhs(blockA+
info[tid].lhs_start*actual_kc, lhs.getSubMapper(
info[tid].lhs_start,k), actual_kc,
info[tid].lhs_length);
121 for(
int shift=0; shift<threads; ++shift)
123 int i = (tid+shift)%threads;
129 while(
info[i].sync!=k) {
133 gebp(res.getSubMapper(
info[i].lhs_start, 0), blockA+
info[
i].lhs_start*actual_kc, blockB,
info[
i].lhs_length, actual_kc,
nc,
alpha);
142 pack_rhs(blockB, rhs.getSubMapper(k,
j), actual_kc, actual_nc);
145 gebp(res.getSubMapper(0,
j), blockA, blockB,
rows, actual_kc, actual_nc,
alpha);
158 #endif // EIGEN_HAS_OPENMP 169 const bool pack_rhs_once = mc!=rows && kc==depth && nc==
cols;
184 pack_lhs(blockA, lhs.getSubMapper(i2,k2), actual_kc, actual_mc);
194 if((!pack_rhs_once) || i2==0)
195 pack_rhs(blockB, rhs.getSubMapper(k2,j2), actual_kc, actual_nc);
198 gebp(res.getSubMapper(i2, j2), blockA, blockB, actual_mc, actual_kc, actual_nc,
alpha);
212 template<
typename Scalar,
typename Index,
typename Gemm,
typename Lhs,
typename Rhs,
typename Dest,
typename BlockingType>
216 : m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking)
221 m_blocking.initParallel(m_lhs.rows(), m_rhs.cols(), m_lhs.cols(), num_threads);
222 m_blocking.allocateA();
231 &m_lhs.coeffRef(row,0), m_lhs.outerStride(),
232 &m_rhs.coeffRef(0,
col), m_rhs.outerStride(),
233 (
Scalar*)&(m_dest.coeffRef(row,
col)), m_dest.innerStride(), m_dest.outerStride(),
234 m_actualAlpha, m_blocking,
info);
247 template<
int StorageOrder,
typename LhsScalar,
typename RhsScalar,
int MaxRows,
int MaxCols,
int MaxDepth,
int KcFactor=1,
250 template<
typename _LhsScalar,
typename _RhsScalar>
267 : m_blockA(0), m_blockB(0), m_mc(0), m_nc(0), m_kc(0)
274 inline LhsScalar*
blockA() {
return m_blockA; }
275 inline RhsScalar*
blockB() {
return m_blockB; }
278 template<
int StorageOrder,
typename _LhsScalar,
typename _RhsScalar,
int MaxRows,
int MaxCols,
int MaxDepth,
int KcFactor>
281 typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
282 typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
293 SizeA = ActualRows * MaxDepth,
294 SizeB = ActualCols * MaxDepth
297 #if EIGEN_MAX_STATIC_ALIGN_BYTES >= EIGEN_DEFAULT_ALIGN_BYTES 309 this->m_mc = ActualRows;
310 this->m_nc = ActualCols;
311 this->m_kc = MaxDepth;
312 #if EIGEN_MAX_STATIC_ALIGN_BYTES >= EIGEN_DEFAULT_ALIGN_BYTES 313 this->m_blockA = m_staticA;
314 this->m_blockB = m_staticB;
329 template<
int StorageOrder,
typename _LhsScalar,
typename _RhsScalar,
int MaxRows,
int MaxCols,
int MaxDepth,
int KcFactor>
332 typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
333 typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
355 computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, this->m_nc, num_threads);
360 computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc,
n, num_threads);
363 m_sizeA = this->m_mc * this->m_kc;
364 m_sizeB = this->m_kc * this->m_nc;
375 computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc,
m, this->m_nc, num_threads);
376 m_sizeA = this->m_mc * this->m_kc;
377 m_sizeB = this->m_kc * this->m_nc;
382 if(this->m_blockA==0)
383 this->m_blockA = aligned_new<LhsScalar>(m_sizeA);
388 if(this->m_blockB==0)
389 this->m_blockB = aligned_new<RhsScalar>(m_sizeB);
409 template<
typename Lhs,
typename Rhs>
431 template<
typename Dst>
445 scaleAndAddTo(dst, lhs, rhs,
Scalar(1));
449 template<
typename Dst>
455 scaleAndAddTo(dst,lhs, rhs,
Scalar(1));
458 template<
typename Dst>
464 scaleAndAddTo(dst, lhs, rhs,
Scalar(-1));
467 template<
typename Dest>
470 eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols());
471 if(a_lhs.cols()==0 || a_lhs.rows()==0 || a_rhs.cols()==0)
481 else if (dst.rows() == 1)
495 Dest::MaxRowsAtCompileTime,Dest::MaxColsAtCompileTime,MaxDepthAtCompileTime> BlockingType;
501 LhsScalar, (ActualLhsTypeCleaned::Flags&
RowMajorBit) ?
RowMajor : ColMajor,
bool(LhsBlasTraits::NeedToConjugate),
502 RhsScalar, (ActualRhsTypeCleaned::Flags&
RowMajorBit) ?
RowMajor : ColMajor,
bool(RhsBlasTraits::NeedToConjugate),
504 Dest::InnerStrideAtCompileTime>,
505 ActualLhsTypeCleaned, ActualRhsTypeCleaned, Dest, BlockingType> GemmFunctor;
507 BlockingType blocking(dst.rows(), dst.cols(), lhs.cols(), 1,
true);
508 internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==
Dynamic)>
509 (GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), a_lhs.rows(), a_rhs.cols(), a_lhs.cols(), Dest::Flags&
RowMajorBit);
517 #endif // EIGEN_GENERAL_MATRIX_MATRIX_H
void initParallel(Index rows, Index cols, Index depth, Index num_threads)
Block< Derived, 1, internal::traits< Derived >::ColsAtCompileTime, IsRowMajor > RowXpr
gebp_traits< LhsScalar, RhsScalar > Traits
gemm_blocking_space(Index, Index, Index, Index, bool)
#define EIGEN_STRONG_INLINE
gebp_traits< LhsScalar, RhsScalar > Traits
internal::remove_all< ActualRhsType >::type ActualRhsTypeCleaned
void initParallelSession(Index num_threads) const
gebp_traits< RhsScalar, LhsScalar > Traits
Expression of the product of two arbitrary matrices or vectors.
RhsBlasTraits::DirectLinearAccessType ActualRhsType
Expression of the transpose of a matrix.
EIGEN_DEVICE_FUNC void aligned_delete(T *ptr, std::size_t size)
conditional< Transpose, _LhsScalar, _RhsScalar >::type RhsScalar
internal::remove_all< ActualLhsType >::type ActualLhsTypeCleaned
Namespace containing all symbols from the Eigen library.
Block< Derived, internal::traits< Derived >::RowsAtCompileTime, 1, !IsRowMajor > ColXpr
ScalarBinaryOpTraits< LhsScalar, RhsScalar >::ReturnType ResScalar
static void subTo(Dst &dst, const Lhs &lhs, const Rhs &rhs)
void initParallel(Index, Index, Index, Index)
const unsigned int RowMajorBit
gemm_functor(const Lhs &lhs, const Rhs &rhs, Dest &dest, const Scalar &actualAlpha, BlockingType &blocking)
#define EIGEN_SIZE_MIN_PREFER_FIXED(a, b)
ScalarBinaryOpTraits< LhsScalar, RhsScalar >::ReturnType ResScalar
int omp_get_num_threads(void)
static EIGEN_STRONG_INLINE void run(Index rows, Index cols, Index depth, const LhsScalar *lhs, Index lhsStride, const RhsScalar *rhs, Index rhsStride, ResScalar *res, Index resIncr, Index resStride, ResScalar alpha, level3_blocking< RhsScalar, LhsScalar > &blocking, GemmParallelInfo< Index > *info=0)
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
conditional< Transpose, _RhsScalar, _LhsScalar >::type LhsScalar
internal::blas_traits< Rhs > RhsBlasTraits
conditional< Transpose, _RhsScalar, _LhsScalar >::type LhsScalar
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
conditional< Transpose, _LhsScalar, _RhsScalar >::type RhsScalar
gemm_blocking_space(Index rows, Index cols, Index depth, Index num_threads, bool l3_blocking)
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE ResScalar combine_scalar_factors(const ResScalar &alpha, const Lhs &lhs, const Rhs &rhs)
internal::blas_traits< Lhs > LhsBlasTraits
static void run(Index rows, Index cols, Index depth, const LhsScalar *_lhs, Index lhsStride, const RhsScalar *_rhs, Index rhsStride, ResScalar *_res, Index resIncr, Index resStride, ResScalar alpha, level3_blocking< LhsScalar, RhsScalar > &blocking, GemmParallelInfo< Index > *info=0)
#define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER)
static void evalTo(Dst &dst, const Lhs &lhs, const Rhs &rhs)
LhsBlasTraits::DirectLinearAccessType ActualLhsType
static void addTo(Dst &dst, const Lhs &lhs, const Rhs &rhs)
#define EIGEN_GEMM_TO_COEFFBASED_THRESHOLD
Determines whether the given binary operation of two numeric types is allowed and what the scalar ret...
BlockingType & m_blocking
static void scaleAndAddTo(Dest &dst, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar &alpha)
#define eigen_internal_assert(x)
Generic expression where a coefficient-wise unary operator is applied to an expression.
generic_product_impl< Lhs, Rhs, DenseShape, DenseShape, CoeffBasedProductMode > lazyproduct
internal::enable_if< internal::valid_indexed_view_overload< RowIndices, ColIndices >::value &&internal::traits< typename EIGEN_INDEXED_VIEW_METHOD_TYPE< RowIndices, ColIndices >::type >::ReturnAsIndexedView, typename EIGEN_INDEXED_VIEW_METHOD_TYPE< RowIndices, ColIndices >::type >::type operator()(const RowIndices &rowIndices, const ColIndices &colIndices) EIGEN_INDEXED_VIEW_METHOD_CONST
#define EIGEN_HAS_CXX11_ATOMIC
Product< Lhs, Rhs >::Scalar Scalar
int omp_get_thread_num(void)
#define EIGEN_UNUSED_VARIABLE(var)
gebp_traits< LhsScalar, RhsScalar > Traits