10 #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H 
   11 #define EIGEN_SELFADJOINT_MATRIX_MATRIX_H 
   18 template<
typename Scalar, 
typename Index, 
int Pack1, 
int Pack2_dummy, 
int StorageOrder>
 
   21   template<
int BlockRows> 
inline 
   25     for(
Index k=0; k<i; k++)
 
   26       for(
Index w=0; w<BlockRows; w++)
 
   27         blockA[count++] = lhs(i+w,k);           
 
   30     for(
Index k=i; k<i+BlockRows; k++)
 
   32       for(
Index w=0; w<h; w++)
 
   37       for(
Index w=h+1; w<BlockRows; w++)
 
   38         blockA[count++] = lhs(i+w, k);          
 
   42     for(
Index k=i+BlockRows; k<cols; k++)
 
   43       for(
Index w=0; w<BlockRows; w++)
 
   53     const Index peeled_mc3 = Pack1>=3*PacketSize ? (rows/(3*PacketSize))*(3*PacketSize) : 0;
 
   54     const Index peeled_mc2 = Pack1>=2*PacketSize ? peeled_mc3+((rows-peeled_mc3)/(2*PacketSize))*(2*PacketSize) : 0;
 
   55     const Index peeled_mc1 = Pack1>=1*PacketSize ? (rows/(1*PacketSize))*(1*PacketSize) : 0;
 
   57     if(Pack1>=3*PacketSize)
 
   58       for(
Index i=0; i<peeled_mc3; i+=3*PacketSize)
 
   59         pack<3*PacketSize>(blockA, lhs, cols, i, count);
 
   61     if(Pack1>=2*PacketSize)
 
   62       for(
Index i=peeled_mc3; i<peeled_mc2; i+=2*PacketSize)
 
   63         pack<2*PacketSize>(blockA, lhs, cols, i, count);
 
   65     if(Pack1>=1*PacketSize)
 
   66       for(
Index i=peeled_mc2; i<peeled_mc1; i+=1*PacketSize)
 
   67         pack<1*PacketSize>(blockA, lhs, cols, i, count);
 
   70     for(
Index i=peeled_mc1; i<rows; i++)
 
   72       for(
Index k=0; k<i; k++)
 
   73         blockA[count++] = lhs(i, k);                   
 
   77       for(
Index k=i+1; k<cols; k++)
 
   83 template<
typename Scalar, 
typename Index, 
int nr, 
int StorageOrder>
 
   89     Index end_k = k2 + rows;
 
   92     Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0;
 
   93     Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
 
   96     for(
Index j2=0; j2<k2; j2+=nr)
 
   98       for(
Index k=k2; k<end_k; k++)
 
  100         blockB[count+0] = rhs(k,j2+0);
 
  101         blockB[count+1] = rhs(k,j2+1);
 
  104           blockB[count+2] = rhs(k,j2+2);
 
  105           blockB[count+3] = rhs(k,j2+3);
 
  109           blockB[count+4] = rhs(k,j2+4);
 
  110           blockB[count+5] = rhs(k,j2+5);
 
  111           blockB[count+6] = rhs(k,j2+6);
 
  112           blockB[count+7] = rhs(k,j2+7);
 
  122       for(
Index j2=k2; j2<end8; j2+=8)
 
  126         for(
Index k=k2; k<j2; k++)
 
  140         for(
Index k=j2; k<j2+8; k++)
 
  143           for (
Index w=0 ; w<h; ++w)
 
  144             blockB[count+w] = rhs(k,j2+w);
 
  149           for (
Index w=h+1 ; w<8; ++w)
 
  155         for(
Index k=j2+8; k<end_k; k++)
 
  157           blockB[count+0] = rhs(k,j2+0);
 
  158           blockB[count+1] = rhs(k,j2+1);
 
  159           blockB[count+2] = rhs(k,j2+2);
 
  160           blockB[count+3] = rhs(k,j2+3);
 
  161           blockB[count+4] = rhs(k,j2+4);
 
  162           blockB[count+5] = rhs(k,j2+5);
 
  163           blockB[count+6] = rhs(k,j2+6);
 
  164           blockB[count+7] = rhs(k,j2+7);
 
  171       for(
Index j2=end8; j2<(
std::min)(k2+rows,packet_cols4); j2+=4)
 
  175         for(
Index k=k2; k<j2; k++)
 
  185         for(
Index k=j2; k<j2+4; k++)
 
  188           for (
Index w=0 ; w<h; ++w)
 
  189             blockB[count+w] = rhs(k,j2+w);
 
  194           for (
Index w=h+1 ; w<4; ++w)
 
  200         for(
Index k=j2+4; k<end_k; k++)
 
  202           blockB[count+0] = rhs(k,j2+0);
 
  203           blockB[count+1] = rhs(k,j2+1);
 
  204           blockB[count+2] = rhs(k,j2+2);
 
  205           blockB[count+3] = rhs(k,j2+3);
 
  214       for(
Index j2=k2+rows; j2<packet_cols8; j2+=8)
 
  216         for(
Index k=k2; k<end_k; k++)
 
  232       for(
Index j2=(
std::max)(packet_cols8,k2+rows); j2<packet_cols4; j2+=4)
 
  234         for(
Index k=k2; k<end_k; k++)
 
  246     for(
Index j2=packet_cols4; j2<cols; ++j2)
 
  267         blockB[count] = rhs(k,j2);
 
  278           int LhsStorageOrder, 
bool LhsSelfAdjoint, 
bool ConjugateLhs,
 
  279           int RhsStorageOrder, 
bool RhsSelfAdjoint, 
bool ConjugateRhs,
 
  284           int LhsStorageOrder, 
bool LhsSelfAdjoint, 
bool ConjugateLhs,
 
  285           int RhsStorageOrder, 
bool RhsSelfAdjoint, 
bool ConjugateRhs>
 
  302       ::run(cols, rows,  rhs, rhsStride,  lhs, lhsStride,  res, resStride,  
alpha, blocking);
 
  307           int LhsStorageOrder, 
bool ConjugateLhs,
 
  308           int RhsStorageOrder, 
bool ConjugateRhs>
 
  321           int LhsStorageOrder, 
bool ConjugateLhs,
 
  322           int RhsStorageOrder, 
bool ConjugateRhs>
 
  338     LhsMapper lhs(_lhs,lhsStride);
 
  339     LhsTransposeMapper lhs_transpose(_lhs,lhsStride);
 
  340     RhsMapper rhs(_rhs,rhsStride);
 
  341     ResMapper res(_res, resStride);
 
  347     std::size_t sizeA = kc*mc;
 
  348     std::size_t sizeB = kc*cols;
 
  364       pack_rhs(blockB, rhs.getSubMapper(k2,0), actual_kc, cols);
 
  370       for(
Index i2=0; i2<k2; i2+=mc)
 
  374         pack_lhs_transposed(blockA, lhs_transpose.getSubMapper(i2, k2), actual_kc, actual_mc);
 
  376         gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, 
alpha);
 
  382         pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);
 
  384         gebp_kernel(res.getSubMapper(k2, 0), blockA, blockB, actual_mc, actual_kc, cols, 
alpha);
 
  391           (blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
 
  393         gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, 
alpha);
 
  400           int LhsStorageOrder, 
bool ConjugateLhs,
 
  401           int RhsStorageOrder, 
bool ConjugateRhs>
 
  414           int LhsStorageOrder, 
bool ConjugateLhs,
 
  415           int RhsStorageOrder, 
bool ConjugateRhs>
 
  429     LhsMapper lhs(_lhs,lhsStride);
 
  430     ResMapper res(_res,resStride);
 
  434     std::size_t sizeA = kc*mc;
 
  435     std::size_t sizeB = kc*cols;
 
  447       pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2);
 
  450       for(
Index i2=0; i2<rows; i2+=mc)
 
  453         pack_lhs(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
 
  455         gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, 
alpha);
 
  468 template<
typename Lhs, 
int LhsMode, 
typename Rhs, 
int RhsMode>
 
  469 struct selfadjoint_product_impl<
Lhs,LhsMode,false,
Rhs,RhsMode,false>
 
  485   template<
typename Dest>
 
  488     eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols());
 
  493     Scalar actualAlpha = 
alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
 
  494                                * RhsBlasTraits::extractScalarFactor(a_rhs);
 
  497               Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,1> BlockingType;
 
  499     BlockingType blocking(lhs.rows(), rhs.cols(), lhs.cols(), 1, 
false);
 
  508         lhs.rows(), rhs.cols(),                 
 
  509         &lhs.coeffRef(0,0), lhs.outerStride(),  
 
  510         &rhs.coeffRef(0,0), rhs.outerStride(),  
 
  511         &dst.coeffRef(0,0), dst.outerStride(),  
 
  512         actualAlpha, blocking                   
 
  521 #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H