00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_H
00011 #define EIGEN_TRIANGULAR_MATRIX_MATRIX_H
00012
00013 namespace Eigen {
00014
00015 namespace internal {
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044 template <typename Scalar, typename Index,
00045 int Mode, bool LhsIsTriangular,
00046 int LhsStorageOrder, bool ConjugateLhs,
00047 int RhsStorageOrder, bool ConjugateRhs,
00048 int ResStorageOrder, int Version = Specialized>
00049 struct product_triangular_matrix_matrix;
00050
00051 template <typename Scalar, typename Index,
00052 int Mode, bool LhsIsTriangular,
00053 int LhsStorageOrder, bool ConjugateLhs,
00054 int RhsStorageOrder, bool ConjugateRhs, int Version>
00055 struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
00056 LhsStorageOrder,ConjugateLhs,
00057 RhsStorageOrder,ConjugateRhs,RowMajor,Version>
00058 {
00059 static EIGEN_STRONG_INLINE void run(
00060 Index rows, Index cols, Index depth,
00061 const Scalar* lhs, Index lhsStride,
00062 const Scalar* rhs, Index rhsStride,
00063 Scalar* res, Index resStride,
00064 Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
00065 {
00066 product_triangular_matrix_matrix<Scalar, Index,
00067 (Mode&(UnitDiag|ZeroDiag)) | ((Mode&Upper) ? Lower : Upper),
00068 (!LhsIsTriangular),
00069 RhsStorageOrder==RowMajor ? ColMajor : RowMajor,
00070 ConjugateRhs,
00071 LhsStorageOrder==RowMajor ? ColMajor : RowMajor,
00072 ConjugateLhs,
00073 ColMajor>
00074 ::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking);
00075 }
00076 };
00077
00078
00079 template <typename Scalar, typename Index, int Mode,
00080 int LhsStorageOrder, bool ConjugateLhs,
00081 int RhsStorageOrder, bool ConjugateRhs, int Version>
00082 struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
00083 LhsStorageOrder,ConjugateLhs,
00084 RhsStorageOrder,ConjugateRhs,ColMajor,Version>
00085 {
00086
00087 typedef gebp_traits<Scalar,Scalar> Traits;
00088 enum {
00089 SmallPanelWidth = 2 * EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
00090 IsLower = (Mode&Lower) == Lower,
00091 SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
00092 };
00093
00094 static EIGEN_DONT_INLINE void run(
00095 Index _rows, Index _cols, Index _depth,
00096 const Scalar* _lhs, Index lhsStride,
00097 const Scalar* _rhs, Index rhsStride,
00098 Scalar* res, Index resStride,
00099 Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
00100 {
00101
00102 Index diagSize = (std::min)(_rows,_depth);
00103 Index rows = IsLower ? _rows : diagSize;
00104 Index depth = IsLower ? diagSize : _depth;
00105 Index cols = _cols;
00106
00107 const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
00108 const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
00109
00110 Index kc = blocking.kc();
00111 Index mc = (std::min)(rows,blocking.mc());
00112
00113 std::size_t sizeA = kc*mc;
00114 std::size_t sizeB = kc*cols;
00115 std::size_t sizeW = kc*Traits::WorkSpaceFactor;
00116
00117 ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
00118 ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
00119 ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW());
00120
00121 Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer;
00122 triangularBuffer.setZero();
00123 if((Mode&ZeroDiag)==ZeroDiag)
00124 triangularBuffer.diagonal().setZero();
00125 else
00126 triangularBuffer.diagonal().setOnes();
00127
00128 gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
00129 gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
00130 gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
00131
00132 for(Index k2=IsLower ? depth : 0;
00133 IsLower ? k2>0 : k2<depth;
00134 IsLower ? k2-=kc : k2+=kc)
00135 {
00136 Index actual_kc = (std::min)(IsLower ? k2 : depth-k2, kc);
00137 Index actual_k2 = IsLower ? k2-actual_kc : k2;
00138
00139
00140 if((!IsLower)&&(k2<rows)&&(k2+actual_kc>rows))
00141 {
00142 actual_kc = rows-k2;
00143 k2 = k2+actual_kc-kc;
00144 }
00145
00146 pack_rhs(blockB, &rhs(actual_k2,0), rhsStride, actual_kc, cols);
00147
00148
00149
00150
00151
00152
00153
00154 if(IsLower || actual_k2<rows)
00155 {
00156
00157 for (Index k1=0; k1<actual_kc; k1+=SmallPanelWidth)
00158 {
00159 Index actualPanelWidth = std::min<Index>(actual_kc-k1, SmallPanelWidth);
00160 Index lengthTarget = IsLower ? actual_kc-k1-actualPanelWidth : k1;
00161 Index startBlock = actual_k2+k1;
00162 Index blockBOffset = k1;
00163
00164
00165
00166
00167 for (Index k=0;k<actualPanelWidth;++k)
00168 {
00169 if (SetDiag)
00170 triangularBuffer.coeffRef(k,k) = lhs(startBlock+k,startBlock+k);
00171 for (Index i=IsLower ? k+1 : 0; IsLower ? i<actualPanelWidth : i<k; ++i)
00172 triangularBuffer.coeffRef(i,k) = lhs(startBlock+i,startBlock+k);
00173 }
00174 pack_lhs(blockA, triangularBuffer.data(), triangularBuffer.outerStride(), actualPanelWidth, actualPanelWidth);
00175
00176 gebp_kernel(res+startBlock, resStride, blockA, blockB, actualPanelWidth, actualPanelWidth, cols, alpha,
00177 actualPanelWidth, actual_kc, 0, blockBOffset, blockW);
00178
00179
00180 if (lengthTarget>0)
00181 {
00182 Index startTarget = IsLower ? actual_k2+k1+actualPanelWidth : actual_k2;
00183
00184 pack_lhs(blockA, &lhs(startTarget,startBlock), lhsStride, actualPanelWidth, lengthTarget);
00185
00186 gebp_kernel(res+startTarget, resStride, blockA, blockB, lengthTarget, actualPanelWidth, cols, alpha,
00187 actualPanelWidth, actual_kc, 0, blockBOffset, blockW);
00188 }
00189 }
00190 }
00191
00192 {
00193 Index start = IsLower ? k2 : 0;
00194 Index end = IsLower ? rows : (std::min)(actual_k2,rows);
00195 for(Index i2=start; i2<end; i2+=mc)
00196 {
00197 const Index actual_mc = (std::min)(i2+mc,end)-i2;
00198 gemm_pack_lhs<Scalar, Index, Traits::mr,Traits::LhsProgress, LhsStorageOrder,false>()
00199 (blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc);
00200
00201 gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1, -1, 0, 0, blockW);
00202 }
00203 }
00204 }
00205 }
00206 };
00207
00208
00209 template <typename Scalar, typename Index, int Mode,
00210 int LhsStorageOrder, bool ConjugateLhs,
00211 int RhsStorageOrder, bool ConjugateRhs, int Version>
00212 struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
00213 LhsStorageOrder,ConjugateLhs,
00214 RhsStorageOrder,ConjugateRhs,ColMajor,Version>
00215 {
00216 typedef gebp_traits<Scalar,Scalar> Traits;
00217 enum {
00218 SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
00219 IsLower = (Mode&Lower) == Lower,
00220 SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
00221 };
00222
00223 static EIGEN_DONT_INLINE void run(
00224 Index _rows, Index _cols, Index _depth,
00225 const Scalar* _lhs, Index lhsStride,
00226 const Scalar* _rhs, Index rhsStride,
00227 Scalar* res, Index resStride,
00228 Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
00229 {
00230
00231 Index diagSize = (std::min)(_cols,_depth);
00232 Index rows = _rows;
00233 Index depth = IsLower ? _depth : diagSize;
00234 Index cols = IsLower ? diagSize : _cols;
00235
00236 const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
00237 const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
00238
00239 Index kc = blocking.kc();
00240 Index mc = (std::min)(rows,blocking.mc());
00241
00242 std::size_t sizeA = kc*mc;
00243 std::size_t sizeB = kc*cols;
00244 std::size_t sizeW = kc*Traits::WorkSpaceFactor;
00245
00246 ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
00247 ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
00248 ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW());
00249
00250 Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer;
00251 triangularBuffer.setZero();
00252 if((Mode&ZeroDiag)==ZeroDiag)
00253 triangularBuffer.diagonal().setZero();
00254 else
00255 triangularBuffer.diagonal().setOnes();
00256
00257 gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
00258 gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
00259 gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
00260 gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder,false,true> pack_rhs_panel;
00261
00262 for(Index k2=IsLower ? 0 : depth;
00263 IsLower ? k2<depth : k2>0;
00264 IsLower ? k2+=kc : k2-=kc)
00265 {
00266 Index actual_kc = (std::min)(IsLower ? depth-k2 : k2, kc);
00267 Index actual_k2 = IsLower ? k2 : k2-actual_kc;
00268
00269
00270 if(IsLower && (k2<cols) && (actual_k2+actual_kc>cols))
00271 {
00272 actual_kc = cols-k2;
00273 k2 = actual_k2 + actual_kc - kc;
00274 }
00275
00276
00277 Index rs = IsLower ? (std::min)(cols,actual_k2) : cols - k2;
00278
00279 Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc;
00280
00281 Scalar* geb = blockB+ts*ts;
00282
00283 pack_rhs(geb, &rhs(actual_k2,IsLower ? 0 : k2), rhsStride, actual_kc, rs);
00284
00285
00286 if(ts>0)
00287 {
00288 for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)
00289 {
00290 Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
00291 Index actual_j2 = actual_k2 + j2;
00292 Index panelOffset = IsLower ? j2+actualPanelWidth : 0;
00293 Index panelLength = IsLower ? actual_kc-j2-actualPanelWidth : j2;
00294
00295 pack_rhs_panel(blockB+j2*actual_kc,
00296 &rhs(actual_k2+panelOffset, actual_j2), rhsStride,
00297 panelLength, actualPanelWidth,
00298 actual_kc, panelOffset);
00299
00300
00301 for (Index j=0;j<actualPanelWidth;++j)
00302 {
00303 if (SetDiag)
00304 triangularBuffer.coeffRef(j,j) = rhs(actual_j2+j,actual_j2+j);
00305 for (Index k=IsLower ? j+1 : 0; IsLower ? k<actualPanelWidth : k<j; ++k)
00306 triangularBuffer.coeffRef(k,j) = rhs(actual_j2+k,actual_j2+j);
00307 }
00308
00309 pack_rhs_panel(blockB+j2*actual_kc,
00310 triangularBuffer.data(), triangularBuffer.outerStride(),
00311 actualPanelWidth, actualPanelWidth,
00312 actual_kc, j2);
00313 }
00314 }
00315
00316 for (Index i2=0; i2<rows; i2+=mc)
00317 {
00318 const Index actual_mc = (std::min)(mc,rows-i2);
00319 pack_lhs(blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc);
00320
00321
00322 if(ts>0)
00323 {
00324 for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)
00325 {
00326 Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
00327 Index panelLength = IsLower ? actual_kc-j2 : j2+actualPanelWidth;
00328 Index blockOffset = IsLower ? j2 : 0;
00329
00330 gebp_kernel(res+i2+(actual_k2+j2)*resStride, resStride,
00331 blockA, blockB+j2*actual_kc,
00332 actual_mc, panelLength, actualPanelWidth,
00333 alpha,
00334 actual_kc, actual_kc,
00335 blockOffset, blockOffset,
00336 blockW);
00337 }
00338 }
00339 gebp_kernel(res+i2+(IsLower ? 0 : k2)*resStride, resStride,
00340 blockA, geb, actual_mc, actual_kc, rs,
00341 alpha,
00342 -1, -1, 0, 0, blockW);
00343 }
00344 }
00345 }
00346 };
00347
00348
00349
00350
00351
00352 template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
00353 struct traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false> >
00354 : traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>, Lhs, Rhs> >
00355 {};
00356
00357 }
00358
00359 template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
00360 struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
00361 : public ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>, Lhs, Rhs >
00362 {
00363 EIGEN_PRODUCT_PUBLIC_INTERFACE(TriangularProduct)
00364
00365 TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
00366
00367 template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
00368 {
00369 typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
00370 typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
00371
00372 Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
00373 * RhsBlasTraits::extractScalarFactor(m_rhs);
00374
00375 typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
00376 Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType;
00377
00378 enum { IsLower = (Mode&Lower) == Lower };
00379 Index stripedRows = ((!LhsIsTriangular) || (IsLower)) ? lhs.rows() : (std::min)(lhs.rows(),lhs.cols());
00380 Index stripedCols = ((LhsIsTriangular) || (!IsLower)) ? rhs.cols() : (std::min)(rhs.cols(),rhs.rows());
00381 Index stripedDepth = LhsIsTriangular ? ((!IsLower) ? lhs.cols() : (std::min)(lhs.cols(),lhs.rows()))
00382 : ((IsLower) ? rhs.rows() : (std::min)(rhs.rows(),rhs.cols()));
00383
00384 BlockingType blocking(stripedRows, stripedCols, stripedDepth);
00385
00386 internal::product_triangular_matrix_matrix<Scalar, Index,
00387 Mode, LhsIsTriangular,
00388 (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
00389 (internal::traits<_ActualRhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
00390 (internal::traits<Dest >::Flags&RowMajorBit) ? RowMajor : ColMajor>
00391 ::run(
00392 stripedRows, stripedCols, stripedDepth,
00393 &lhs.coeffRef(0,0), lhs.outerStride(),
00394 &rhs.coeffRef(0,0), rhs.outerStride(),
00395 &dst.coeffRef(0,0), dst.outerStride(),
00396 actualAlpha, blocking
00397 );
00398 }
00399 };
00400
00401 }
00402
00403 #endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_H