10 #ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_H    11 #define EIGEN_TRIANGULAR_MATRIX_MATRIX_H    44 template <
typename Scalar, 
typename Index,
    45           int Mode, 
bool LhsIsTriangular,
    46           int LhsStorageOrder, 
bool ConjugateLhs,
    47           int RhsStorageOrder, 
bool ConjugateRhs,
    51 template <
typename Scalar, 
typename Index,
    52           int Mode, 
bool LhsIsTriangular,
    53           int LhsStorageOrder, 
bool ConjugateLhs,
    54           int RhsStorageOrder, 
bool ConjugateRhs, 
int Version>
    56                                            LhsStorageOrder,ConjugateLhs,
    57                                            RhsStorageOrder,ConjugateRhs,
RowMajor,Version>
    60     Index rows, Index cols, Index depth,
    61     const Scalar* lhs, Index lhsStride,
    62     const Scalar* rhs, Index rhsStride,
    63     Scalar* res,       Index resStride,
    71       LhsStorageOrder==RowMajor ? 
ColMajor : RowMajor,
    74       ::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking);
    79 template <
typename Scalar, 
typename Index, 
int Mode,
    80           int LhsStorageOrder, 
bool ConjugateLhs,
    81           int RhsStorageOrder, 
bool ConjugateRhs, 
int Version>
    83                                            LhsStorageOrder,ConjugateLhs,
    84                                            RhsStorageOrder,ConjugateRhs,
ColMajor,Version>
    95     Index _rows, Index _cols, Index _depth,
    96     const Scalar* _lhs, Index lhsStride,
    97     const Scalar* _rhs, Index rhsStride,
    98     Scalar* res,        Index resStride,
   102 template <
typename Scalar, 
typename Index, 
int Mode,
   103           int LhsStorageOrder, 
bool ConjugateLhs,
   104           int RhsStorageOrder, 
bool ConjugateRhs, 
int Version>
   106                                                         LhsStorageOrder,ConjugateLhs,
   107                                                         RhsStorageOrder,ConjugateRhs,
ColMajor,Version>::run(
   108     Index _rows, Index _cols, Index _depth,
   109     const Scalar* _lhs, Index lhsStride,
   110     const Scalar* _rhs, Index rhsStride,
   111     Scalar* _res,        Index resStride,
   115     Index diagSize  = (
std::min)(_rows,_depth);
   116     Index rows      = IsLower ? _rows : diagSize;
   117     Index depth     = IsLower ? diagSize : _depth;
   123     LhsMapper lhs(_lhs,lhsStride);
   124     RhsMapper rhs(_rhs,rhsStride);
   125     ResMapper res(_res, resStride);
   127     Index kc = blocking.
kc();                   
   134     std::size_t sizeA = kc*mc;
   135     std::size_t sizeB = kc*cols;
   143       triangularBuffer.diagonal().setZero();
   145       triangularBuffer.diagonal().setOnes();
   151     for(Index k2=IsLower ? depth : 0;
   152         IsLower ? k2>0 : k2<depth;
   153         IsLower ? k2-=kc : k2+=kc)
   155       Index actual_kc = (
std::min)(IsLower ? k2 : depth-k2, kc);
   156       Index actual_k2 = IsLower ? k2-actual_kc : k2;
   159       if((!IsLower)&&(k2<rows)&&(k2+actual_kc>rows))
   162         k2 = k2+actual_kc-kc;
   165       pack_rhs(blockB, rhs.getSubMapper(actual_k2,0), actual_kc, cols);
   173       if(IsLower || actual_k2<rows)
   176         for (Index k1=0; k1<actual_kc; k1+=panelWidth)
   178           Index actualPanelWidth = std::min<Index>(actual_kc-k1, panelWidth);
   179           Index lengthTarget = IsLower ? actual_kc-k1-actualPanelWidth : k1;
   180           Index startBlock   = actual_k2+k1;
   181           Index blockBOffset = k1;
   186           for (Index k=0;k<actualPanelWidth;++k)
   189               triangularBuffer.coeffRef(k,k) = lhs(startBlock+k,startBlock+k);
   190             for (Index i=IsLower ? k+1 : 0; IsLower ? i<actualPanelWidth : i<k; ++i)
   191               triangularBuffer.coeffRef(i,k) = lhs(startBlock+i,startBlock+k);
   193           pack_lhs(blockA, LhsMapper(triangularBuffer.data(), triangularBuffer.outerStride()), actualPanelWidth, actualPanelWidth);
   195           gebp_kernel(res.getSubMapper(startBlock, 0), blockA, blockB,
   196                       actualPanelWidth, actualPanelWidth, cols, alpha,
   197                       actualPanelWidth, actual_kc, 0, blockBOffset);
   202             Index startTarget  = IsLower ? actual_k2+k1+actualPanelWidth : actual_k2;
   204             pack_lhs(blockA, lhs.getSubMapper(startTarget,startBlock), actualPanelWidth, lengthTarget);
   206             gebp_kernel(res.getSubMapper(startTarget, 0), blockA, blockB,
   207                         lengthTarget, actualPanelWidth, cols, alpha,
   208                         actualPanelWidth, actual_kc, 0, blockBOffset);
   214         Index 
start = IsLower ? k2 : 0;
   215         Index end   = IsLower ? rows : (
std::min)(actual_k2,rows);
   216         for(Index i2=start; i2<end; i2+=mc)
   218           const Index actual_mc = (
std::min)(i2+mc,end)-i2;
   220             (blockA, lhs.getSubMapper(i2, actual_k2), actual_kc, actual_mc);
   222           gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc,
   223                       actual_kc, cols, alpha, -1, -1, 0, 0);
   230 template <
typename Scalar, 
typename Index, 
int Mode,
   231           int LhsStorageOrder, 
bool ConjugateLhs,
   232           int RhsStorageOrder, 
bool ConjugateRhs, 
int Version>
   234                                         LhsStorageOrder,ConjugateLhs,
   235                                         RhsStorageOrder,ConjugateRhs,ColMajor,Version>
   245     Index _rows, Index _cols, Index _depth,
   246     const Scalar* _lhs, Index lhsStride,
   247     const Scalar* _rhs, Index rhsStride,
   248     Scalar* res,        Index resStride,
   252 template <
typename Scalar, 
typename Index, 
int Mode,
   253           int LhsStorageOrder, 
bool ConjugateLhs,
   254           int RhsStorageOrder, 
bool ConjugateRhs, 
int Version>
   256                                                         LhsStorageOrder,ConjugateLhs,
   257                                                         RhsStorageOrder,ConjugateRhs,
ColMajor,Version>::run(
   258     Index _rows, Index _cols, Index _depth,
   259     const Scalar* _lhs, Index lhsStride,
   260     const Scalar* _rhs, Index rhsStride,
   261     Scalar* _res,        Index resStride,
   266     Index diagSize  = (
std::min)(_cols,_depth);
   268     Index depth     = IsLower ? _depth : diagSize;
   269     Index cols      = IsLower ? diagSize : _cols;
   274     LhsMapper lhs(_lhs,lhsStride);
   275     RhsMapper rhs(_rhs,rhsStride);
   276     ResMapper res(_res, resStride);
   278     Index kc = blocking.
kc();                   
   281     std::size_t sizeA = kc*mc;
   290       triangularBuffer.diagonal().setZero();
   292       triangularBuffer.diagonal().setOnes();
   299     for(Index k2=IsLower ? 0 : depth;
   300         IsLower ? k2<depth  : k2>0;
   301         IsLower ? k2+=kc   : k2-=kc)
   303       Index actual_kc = (
std::min)(IsLower ? depth-k2 : k2, kc);
   304       Index actual_k2 = IsLower ? k2 : k2-actual_kc;
   307       if(IsLower && (k2<cols) && (actual_k2+actual_kc>cols))
   310         k2 = actual_k2 + actual_kc - kc;
   314       Index rs = IsLower ? (
std::min)(cols,actual_k2) : cols - k2;
   316       Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc;
   318       Scalar* geb = blockB+ts*ts;
   319       geb = geb + internal::first_aligned<PacketBytes>(geb,PacketBytes/
sizeof(Scalar));
   321       pack_rhs(geb, rhs.getSubMapper(actual_k2,IsLower ? 0 : k2), actual_kc, rs);
   326         for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)
   328           Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
   329           Index actual_j2 = actual_k2 + j2;
   330           Index panelOffset = IsLower ? j2+actualPanelWidth : 0;
   331           Index panelLength = IsLower ? actual_kc-j2-actualPanelWidth : j2;
   333           pack_rhs_panel(blockB+j2*actual_kc,
   334                          rhs.getSubMapper(actual_k2+panelOffset, actual_j2),
   335                          panelLength, actualPanelWidth,
   336                          actual_kc, panelOffset);
   339           for (Index j=0;j<actualPanelWidth;++j)
   342               triangularBuffer.coeffRef(j,j) = rhs(actual_j2+j,actual_j2+j);
   343             for (Index k=IsLower ? j+1 : 0; IsLower ? k<actualPanelWidth : k<j; ++k)
   344               triangularBuffer.coeffRef(k,j) = rhs(actual_j2+k,actual_j2+j);
   347           pack_rhs_panel(blockB+j2*actual_kc,
   348                          RhsMapper(triangularBuffer.data(), triangularBuffer.outerStride()),
   349                          actualPanelWidth, actualPanelWidth,
   354       for (Index i2=0; i2<rows; i2+=mc)
   356         const Index actual_mc = (
std::min)(mc,rows-i2);
   357         pack_lhs(blockA, lhs.getSubMapper(i2, actual_k2), actual_kc, actual_mc);
   362           for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)
   364             Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
   365             Index panelLength = IsLower ? actual_kc-j2 : j2+actualPanelWidth;
   366             Index blockOffset = IsLower ? j2 : 0;
   368             gebp_kernel(res.getSubMapper(i2, actual_k2 + j2),
   369                         blockA, blockB+j2*actual_kc,
   370                         actual_mc, panelLength, actualPanelWidth,
   372                         actual_kc, actual_kc,  
   373                         blockOffset, blockOffset);
   376         gebp_kernel(res.getSubMapper(i2, IsLower ? 0 : k2),
   377                     blockA, geb, actual_mc, actual_kc, rs,
   391 template<
int Mode, 
bool LhsIsTriangular, 
typename Lhs, 
typename Rhs>
   394   template<
typename Dest> 
static void run(Dest& dst, 
const Lhs &a_lhs, 
const Rhs &a_rhs, 
const typename Dest::Scalar& alpha)
   396     typedef typename Dest::Scalar     Scalar;
   399     typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
   402     typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
   408     Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
   409                                * RhsBlasTraits::extractScalarFactor(a_rhs);
   412               Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType;
   415     Index stripedRows  = ((!LhsIsTriangular) || (IsLower))  ? lhs.rows() : (
std::min)(lhs.rows(),lhs.cols());
   416     Index stripedCols  = ((LhsIsTriangular)  || (!IsLower)) ? rhs.cols() : (
std::min)(rhs.cols(),rhs.rows());
   417     Index stripedDepth = LhsIsTriangular ? ((!IsLower) ? lhs.cols() : (
std::min)(lhs.cols(),lhs.rows()))
   418                                          : ((IsLower)  ? rhs.rows() : (
std::min)(rhs.rows(),rhs.cols()));
   420     BlockingType blocking(stripedRows, stripedCols, stripedDepth, 1, 
false);
   423       Mode, LhsIsTriangular,
   428         stripedRows, stripedCols, stripedDepth,   
   429         &lhs.coeffRef(0,0), lhs.outerStride(),    
   430         &rhs.coeffRef(0,0), rhs.outerStride(),    
   431         &dst.coeffRef(0,0), dst.outerStride(),    
   432         actualAlpha, blocking
   441 #endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_H 
#define EIGEN_STRONG_INLINE
#define EIGEN_MAX_ALIGN_BYTES
gebp_traits< Scalar, Scalar > Traits
#define EIGEN_PLAIN_ENUM_MAX(a, b)
gebp_traits< Scalar, Scalar > Traits
const unsigned int RowMajorBit
#define EIGEN_DONT_INLINE
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API. 
static EIGEN_STRONG_INLINE void run(Index rows, Index cols, Index depth, const Scalar *lhs, Index lhsStride, const Scalar *rhs, Index rhsStride, Scalar *res, Index resStride, const Scalar &alpha, level3_blocking< Scalar, Scalar > &blocking)
EIGEN_DEVICE_FUNC Derived & setZero(Index size)
#define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER)
static void run(Dest &dst, const Lhs &a_lhs, const Rhs &a_rhs, const typename Dest::Scalar &alpha)
The matrix class, also used for vectors and row-vectors.