00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_H
00026 #define EIGEN_TRIANGULAR_MATRIX_MATRIX_H
00027
00028 namespace internal {
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057 template <typename Scalar, typename Index,
00058 int Mode, bool LhsIsTriangular,
00059 int LhsStorageOrder, bool ConjugateLhs,
00060 int RhsStorageOrder, bool ConjugateRhs,
00061 int ResStorageOrder>
00062 struct product_triangular_matrix_matrix;
00063
00064 template <typename Scalar, typename Index,
00065 int Mode, bool LhsIsTriangular,
00066 int LhsStorageOrder, bool ConjugateLhs,
00067 int RhsStorageOrder, bool ConjugateRhs>
00068 struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
00069 LhsStorageOrder,ConjugateLhs,
00070 RhsStorageOrder,ConjugateRhs,RowMajor>
00071 {
00072 static EIGEN_STRONG_INLINE void run(
00073 Index rows, Index cols, Index depth,
00074 const Scalar* lhs, Index lhsStride,
00075 const Scalar* rhs, Index rhsStride,
00076 Scalar* res, Index resStride,
00077 Scalar alpha)
00078 {
00079 product_triangular_matrix_matrix<Scalar, Index,
00080 (Mode&(UnitDiag|ZeroDiag)) | ((Mode&Upper) ? Lower : Upper),
00081 (!LhsIsTriangular),
00082 RhsStorageOrder==RowMajor ? ColMajor : RowMajor,
00083 ConjugateRhs,
00084 LhsStorageOrder==RowMajor ? ColMajor : RowMajor,
00085 ConjugateLhs,
00086 ColMajor>
00087 ::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha);
00088 }
00089 };
00090
00091
00092 template <typename Scalar, typename Index, int Mode,
00093 int LhsStorageOrder, bool ConjugateLhs,
00094 int RhsStorageOrder, bool ConjugateRhs>
00095 struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
00096 LhsStorageOrder,ConjugateLhs,
00097 RhsStorageOrder,ConjugateRhs,ColMajor>
00098 {
00099
00100 typedef gebp_traits<Scalar,Scalar> Traits;
00101 enum {
00102 SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
00103 IsLower = (Mode&Lower) == Lower,
00104 SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
00105 };
00106
00107 static EIGEN_DONT_INLINE void run(
00108 Index _rows, Index _cols, Index _depth,
00109 const Scalar* _lhs, Index lhsStride,
00110 const Scalar* _rhs, Index rhsStride,
00111 Scalar* res, Index resStride,
00112 Scalar alpha)
00113 {
00114
00115 Index diagSize = std::min(_rows,_depth);
00116 Index rows = IsLower ? _rows : diagSize;
00117 Index depth = IsLower ? diagSize : _depth;
00118 Index cols = _cols;
00119
00120 const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
00121 const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
00122
00123 Index kc = depth;
00124 Index mc = rows;
00125 Index nc = cols;
00126 computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
00127 std::size_t sizeW = kc*Traits::WorkSpaceFactor;
00128 std::size_t sizeB = sizeW + kc*cols;
00129 ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
00130 ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
00131 Scalar* blockB = allocatedBlockB + sizeW;
00132
00133 Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer;
00134 triangularBuffer.setZero();
00135 if((Mode&ZeroDiag)==ZeroDiag)
00136 triangularBuffer.diagonal().setZero();
00137 else
00138 triangularBuffer.diagonal().setOnes();
00139
00140 gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
00141 gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
00142 gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
00143
00144 for(Index k2=IsLower ? depth : 0;
00145 IsLower ? k2>0 : k2<depth;
00146 IsLower ? k2-=kc : k2+=kc)
00147 {
00148 Index actual_kc = std::min(IsLower ? k2 : depth-k2, kc);
00149 Index actual_k2 = IsLower ? k2-actual_kc : k2;
00150
00151
00152 if((!IsLower)&&(k2<rows)&&(k2+actual_kc>rows))
00153 {
00154 actual_kc = rows-k2;
00155 k2 = k2+actual_kc-kc;
00156 }
00157
00158 pack_rhs(blockB, &rhs(actual_k2,0), rhsStride, actual_kc, cols);
00159
00160
00161
00162
00163
00164
00165
00166 if(IsLower || actual_k2<rows)
00167 {
00168
00169 for (Index k1=0; k1<actual_kc; k1+=SmallPanelWidth)
00170 {
00171 Index actualPanelWidth = std::min<Index>(actual_kc-k1, SmallPanelWidth);
00172 Index lengthTarget = IsLower ? actual_kc-k1-actualPanelWidth : k1;
00173 Index startBlock = actual_k2+k1;
00174 Index blockBOffset = k1;
00175
00176
00177
00178
00179 for (Index k=0;k<actualPanelWidth;++k)
00180 {
00181 if (SetDiag)
00182 triangularBuffer.coeffRef(k,k) = lhs(startBlock+k,startBlock+k);
00183 for (Index i=IsLower ? k+1 : 0; IsLower ? i<actualPanelWidth : i<k; ++i)
00184 triangularBuffer.coeffRef(i,k) = lhs(startBlock+i,startBlock+k);
00185 }
00186 pack_lhs(blockA, triangularBuffer.data(), triangularBuffer.outerStride(), actualPanelWidth, actualPanelWidth);
00187
00188 gebp_kernel(res+startBlock, resStride, blockA, blockB, actualPanelWidth, actualPanelWidth, cols, alpha,
00189 actualPanelWidth, actual_kc, 0, blockBOffset);
00190
00191
00192 if (lengthTarget>0)
00193 {
00194 Index startTarget = IsLower ? actual_k2+k1+actualPanelWidth : actual_k2;
00195
00196 pack_lhs(blockA, &lhs(startTarget,startBlock), lhsStride, actualPanelWidth, lengthTarget);
00197
00198 gebp_kernel(res+startTarget, resStride, blockA, blockB, lengthTarget, actualPanelWidth, cols, alpha,
00199 actualPanelWidth, actual_kc, 0, blockBOffset);
00200 }
00201 }
00202 }
00203
00204 {
00205 Index start = IsLower ? k2 : 0;
00206 Index end = IsLower ? rows : std::min(actual_k2,rows);
00207 for(Index i2=start; i2<end; i2+=mc)
00208 {
00209 const Index actual_mc = std::min(i2+mc,end)-i2;
00210 gemm_pack_lhs<Scalar, Index, Traits::mr,Traits::LhsProgress, LhsStorageOrder,false>()
00211 (blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc);
00212
00213 gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
00214 }
00215 }
00216 }
00217 }
00218 };
00219
00220
00221 template <typename Scalar, typename Index, int Mode,
00222 int LhsStorageOrder, bool ConjugateLhs,
00223 int RhsStorageOrder, bool ConjugateRhs>
00224 struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
00225 LhsStorageOrder,ConjugateLhs,
00226 RhsStorageOrder,ConjugateRhs,ColMajor>
00227 {
00228 typedef gebp_traits<Scalar,Scalar> Traits;
00229 enum {
00230 SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
00231 IsLower = (Mode&Lower) == Lower,
00232 SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
00233 };
00234
00235 static EIGEN_DONT_INLINE void run(
00236 Index _rows, Index _cols, Index _depth,
00237 const Scalar* _lhs, Index lhsStride,
00238 const Scalar* _rhs, Index rhsStride,
00239 Scalar* res, Index resStride,
00240 Scalar alpha)
00241 {
00242
00243 Index diagSize = std::min(_cols,_depth);
00244 Index rows = _rows;
00245 Index depth = IsLower ? _depth : diagSize;
00246 Index cols = IsLower ? diagSize : _cols;
00247
00248 const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
00249 const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
00250
00251 Index kc = depth;
00252 Index mc = rows;
00253 Index nc = cols;
00254 computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
00255
00256 std::size_t sizeW = kc*Traits::WorkSpaceFactor;
00257 std::size_t sizeB = sizeW + kc*cols;
00258 ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
00259 ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
00260 Scalar* blockB = allocatedBlockB + sizeW;
00261
00262 Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer;
00263 triangularBuffer.setZero();
00264 if((Mode&ZeroDiag)==ZeroDiag)
00265 triangularBuffer.diagonal().setZero();
00266 else
00267 triangularBuffer.diagonal().setOnes();
00268
00269 gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
00270 gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
00271 gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
00272 gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder,false,true> pack_rhs_panel;
00273
00274 for(Index k2=IsLower ? 0 : depth;
00275 IsLower ? k2<depth : k2>0;
00276 IsLower ? k2+=kc : k2-=kc)
00277 {
00278 Index actual_kc = std::min(IsLower ? depth-k2 : k2, kc);
00279 Index actual_k2 = IsLower ? k2 : k2-actual_kc;
00280
00281
00282 if(IsLower && (k2<cols) && (actual_k2+actual_kc>cols))
00283 {
00284 actual_kc = cols-k2;
00285 k2 = actual_k2 + actual_kc - kc;
00286 }
00287
00288
00289 Index rs = IsLower ? std::min(cols,actual_k2) : cols - k2;
00290
00291 Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc;
00292
00293 Scalar* geb = blockB+ts*ts;
00294
00295 pack_rhs(geb, &rhs(actual_k2,IsLower ? 0 : k2), rhsStride, actual_kc, rs);
00296
00297
00298 if(ts>0)
00299 {
00300 for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)
00301 {
00302 Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
00303 Index actual_j2 = actual_k2 + j2;
00304 Index panelOffset = IsLower ? j2+actualPanelWidth : 0;
00305 Index panelLength = IsLower ? actual_kc-j2-actualPanelWidth : j2;
00306
00307 pack_rhs_panel(blockB+j2*actual_kc,
00308 &rhs(actual_k2+panelOffset, actual_j2), rhsStride,
00309 panelLength, actualPanelWidth,
00310 actual_kc, panelOffset);
00311
00312
00313 for (Index j=0;j<actualPanelWidth;++j)
00314 {
00315 if (SetDiag)
00316 triangularBuffer.coeffRef(j,j) = rhs(actual_j2+j,actual_j2+j);
00317 for (Index k=IsLower ? j+1 : 0; IsLower ? k<actualPanelWidth : k<j; ++k)
00318 triangularBuffer.coeffRef(k,j) = rhs(actual_j2+k,actual_j2+j);
00319 }
00320
00321 pack_rhs_panel(blockB+j2*actual_kc,
00322 triangularBuffer.data(), triangularBuffer.outerStride(),
00323 actualPanelWidth, actualPanelWidth,
00324 actual_kc, j2);
00325 }
00326 }
00327
00328 for (Index i2=0; i2<rows; i2+=mc)
00329 {
00330 const Index actual_mc = std::min(mc,rows-i2);
00331 pack_lhs(blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc);
00332
00333
00334 if(ts>0)
00335 {
00336 for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)
00337 {
00338 Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
00339 Index panelLength = IsLower ? actual_kc-j2 : j2+actualPanelWidth;
00340 Index blockOffset = IsLower ? j2 : 0;
00341
00342 gebp_kernel(res+i2+(actual_k2+j2)*resStride, resStride,
00343 blockA, blockB+j2*actual_kc,
00344 actual_mc, panelLength, actualPanelWidth,
00345 alpha,
00346 actual_kc, actual_kc,
00347 blockOffset, blockOffset,
00348 allocatedBlockB);
00349 }
00350 }
00351 gebp_kernel(res+i2+(IsLower ? 0 : k2)*resStride, resStride,
00352 blockA, geb, actual_mc, actual_kc, rs,
00353 alpha,
00354 -1, -1, 0, 0, allocatedBlockB);
00355 }
00356 }
00357 }
00358 };
00359
00360
00361
00362
00363
00364 template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
00365 struct traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false> >
00366 : traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>, Lhs, Rhs> >
00367 {};
00368
00369 }
00370
00371 template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
00372 struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
00373 : public ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>, Lhs, Rhs >
00374 {
00375 EIGEN_PRODUCT_PUBLIC_INTERFACE(TriangularProduct)
00376
00377 TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
00378
00379 template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
00380 {
00381 const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
00382 const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
00383
00384 Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
00385 * RhsBlasTraits::extractScalarFactor(m_rhs);
00386
00387 internal::product_triangular_matrix_matrix<Scalar, Index,
00388 Mode, LhsIsTriangular,
00389 (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
00390 (internal::traits<_ActualRhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
00391 (internal::traits<Dest >::Flags&RowMajorBit) ? RowMajor : ColMajor>
00392 ::run(
00393 lhs.rows(), rhs.cols(), lhs.cols(),
00394 &lhs.coeffRef(0,0), lhs.outerStride(),
00395 &rhs.coeffRef(0,0), rhs.outerStride(),
00396 &dst.coeffRef(0,0), dst.outerStride(),
00397 actualAlpha
00398 );
00399 }
00400 };
00401
00402
00403 #endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_H