10 #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H 11 #define EIGEN_SELFADJOINT_MATRIX_MATRIX_H 18 template<
typename Scalar,
typename Index,
int Pack1,
int Pack2,
int StorageOrder>
21 template<
int BlockRows>
inline 25 for(Index k=0; k<i; k++)
26 for(Index w=0; w<BlockRows; w++)
27 blockA[count++] = lhs(i+w,k);
30 for(Index k=i; k<i+BlockRows; k++)
32 for(Index w=0; w<h; w++)
37 for(Index w=h+1; w<BlockRows; w++)
38 blockA[count++] = lhs(i+w, k);
42 for(Index k=i+BlockRows; k<cols; k++)
43 for(Index w=0; w<BlockRows; w++)
46 void operator()(Scalar* blockA,
const Scalar* _lhs, Index lhsStride, Index cols, Index rows)
50 Index peeled_mc = (rows/Pack1)*Pack1;
51 for(Index i=0; i<peeled_mc; i+=Pack1)
53 pack<Pack1>(blockA, lhs, cols, i, count);
56 if(rows-peeled_mc>=Pack2)
58 pack<Pack2>(blockA, lhs, cols, peeled_mc, count);
63 for(Index i=peeled_mc; i<rows; i++)
65 for(Index k=0; k<i; k++)
66 blockA[count++] = lhs(i, k);
70 for(Index k=i+1; k<cols; k++)
76 template<
typename Scalar,
typename Index,
int nr,
int StorageOrder>
80 void operator()(Scalar* blockB,
const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
82 Index end_k = k2 + rows;
85 Index packet_cols = (cols/nr)*nr;
88 for(Index j2=0; j2<k2; j2+=nr)
90 for(Index k=k2; k<end_k; k++)
92 blockB[count+0] =
rhs(k,j2+0);
93 blockB[count+1] =
rhs(k,j2+1);
96 blockB[count+2] =
rhs(k,j2+2);
97 blockB[count+3] =
rhs(k,j2+3);
104 for(Index j2=k2; j2<(std::min)(k2+rows,packet_cols); j2+=nr)
108 for(Index k=k2; k<j2; k++)
121 for(Index k=j2; k<j2+nr; k++)
124 for (Index w=0 ; w<h; ++w)
125 blockB[count+w] =
rhs(k,j2+w);
130 for (Index w=h+1 ; w<nr; ++w)
136 for(Index k=j2+nr; k<end_k; k++)
138 blockB[count+0] =
rhs(k,j2+0);
139 blockB[count+1] =
rhs(k,j2+1);
142 blockB[count+2] =
rhs(k,j2+2);
143 blockB[count+3] =
rhs(k,j2+3);
150 for(Index j2=k2+rows; j2<packet_cols; j2+=nr)
152 for(Index k=k2; k<end_k; k++)
166 for(Index j2=packet_cols; j2<cols; ++j2)
169 Index half = (std::min)(end_k,j2);
170 for(Index k=k2; k<half; k++)
176 if(half==j2 && half<k2+rows)
185 for(Index k=half+1; k<k2+rows; k++)
187 blockB[count] =
rhs(k,j2);
197 template <
typename Scalar,
typename Index,
198 int LhsStorageOrder,
bool LhsSelfAdjoint,
bool ConjugateLhs,
199 int RhsStorageOrder,
bool RhsSelfAdjoint,
bool ConjugateRhs,
203 template <
typename Scalar,
typename Index,
204 int LhsStorageOrder,
bool LhsSelfAdjoint,
bool ConjugateLhs,
205 int RhsStorageOrder,
bool RhsSelfAdjoint,
bool ConjugateRhs>
210 Index rows, Index cols,
211 const Scalar* lhs, Index lhsStride,
212 const Scalar*
rhs, Index rhsStride,
213 Scalar* res, Index resStride,
222 ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha);
226 template <
typename Scalar,
typename Index,
227 int LhsStorageOrder,
bool ConjugateLhs,
228 int RhsStorageOrder,
bool ConjugateRhs>
233 Index rows, Index cols,
234 const Scalar* _lhs, Index lhsStride,
235 const Scalar* _rhs, Index rhsStride,
236 Scalar* res, Index resStride,
237 const Scalar& alpha);
240 template <
typename Scalar,
typename Index,
241 int LhsStorageOrder,
bool ConjugateLhs,
242 int RhsStorageOrder,
bool ConjugateRhs>
244 Index rows, Index cols,
245 const Scalar* _lhs, Index lhsStride,
246 const Scalar* _rhs, Index rhsStride,
247 Scalar* res, Index resStride,
260 computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
262 kc = (std::min)(kc,mc);
264 std::size_t sizeW = kc*Traits::WorkSpaceFactor;
265 std::size_t sizeB = sizeW + kc*cols;
268 Scalar* blockB = allocatedBlockB + sizeW;
275 for(Index k2=0; k2<size; k2+=kc)
277 const Index actual_kc = (std::min)(k2+kc,size)-k2;
282 pack_rhs(blockB, &
rhs(k2,0), rhsStride, actual_kc, cols);
288 for(Index i2=0; i2<k2; i2+=mc)
290 const Index actual_mc = (std::min)(i2+mc,k2)-i2;
292 pack_lhs_transposed(blockA, &lhs(k2, i2), lhsStride, actual_kc, actual_mc);
294 gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
298 const Index actual_mc = (std::min)(k2+kc,size)-k2;
300 pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);
302 gebp_kernel(res+k2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
305 for(Index i2=k2+kc; i2<size; i2+=mc)
307 const Index actual_mc = (std::min)(i2+mc,size)-i2;
309 (blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
311 gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
317 template <
typename Scalar,
typename Index,
318 int LhsStorageOrder,
bool ConjugateLhs,
319 int RhsStorageOrder,
bool ConjugateRhs>
324 Index rows, Index cols,
325 const Scalar* _lhs, Index lhsStride,
326 const Scalar* _rhs, Index rhsStride,
327 Scalar* res, Index resStride,
328 const Scalar& alpha);
331 template <
typename Scalar,
typename Index,
332 int LhsStorageOrder,
bool ConjugateLhs,
333 int RhsStorageOrder,
bool ConjugateRhs>
335 Index rows, Index cols,
336 const Scalar* _lhs, Index lhsStride,
337 const Scalar* _rhs, Index rhsStride,
338 Scalar* res, Index resStride,
350 computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
351 std::size_t sizeW = kc*Traits::WorkSpaceFactor;
352 std::size_t sizeB = sizeW + kc*cols;
355 Scalar* blockB = allocatedBlockB + sizeW;
361 for(Index k2=0; k2<size; k2+=kc)
363 const Index actual_kc = (std::min)(k2+kc,size)-k2;
365 pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2);
368 for(Index i2=0; i2<rows; i2+=mc)
370 const Index actual_mc = (std::min)(i2+mc,rows)-i2;
371 pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
373 gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
385 template<
typename Lhs,
int LhsMode,
typename Rhs,
int RhsMode>
387 :
traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs> >
391 template<
typename Lhs,
int LhsMode,
typename Rhs,
int RhsMode>
393 :
public ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs >
408 eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
413 Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
414 * RhsBlasTraits::extractScalarFactor(m_rhs);
425 lhs.rows(), rhs.cols(),
426 &lhs.coeffRef(0,0), lhs.outerStride(),
427 &rhs.coeffRef(0,0), rhs.outerStride(),
428 &dst.coeffRef(0,0), dst.outerStride(),
436 #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H
#define EIGEN_PRODUCT_PUBLIC_INTERFACE(Derived)
void operator()(Scalar *blockB, const Scalar *_rhs, Index rhsStride, Index rows, Index cols, Index k2)
const AutoDiffScalar< DerType > & conj(const AutoDiffScalar< DerType > &x)
#define EIGEN_STRONG_INLINE
internal::traits< Derived >::Scalar Scalar
#define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER)
internal::traits< Derived >::Index Index
The type of indices.
iterative scaling algorithm to equilibrate rows and column norms in matrices
Holds information about the various numeric (i.e. scalar) types allowed by Eigen. ...
const unsigned int RowMajorBit
RealReturnType real() const
#define EIGEN_LOGICAL_XOR(a, b)
static EIGEN_STRONG_INLINE void run(Index rows, Index cols, const Scalar *lhs, Index lhsStride, const Scalar *rhs, Index rhsStride, Scalar *res, Index resStride, const Scalar &alpha)
void pack(Scalar *blockA, const const_blas_data_mapper< Scalar, Index, StorageOrder > &lhs, Index cols, Index i, Index &count)
void operator()(Scalar *blockA, const Scalar *_lhs, Index lhsStride, Index cols, Index rows)
void rhs(const real_t *x, real_t *f)
#define EIGEN_DONT_INLINE
void scaleAndAddTo(Dest &dst, const Scalar &alpha) const