00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H
00026 #define EIGEN_SELFADJOINT_MATRIX_MATRIX_H
00027
00028 namespace internal {
00029
00030
00031 template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder>
00032 struct symm_pack_lhs
00033 {
00034 template<int BlockRows> inline
00035 void pack(Scalar* blockA, const const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count)
00036 {
00037
00038 for(Index k=0; k<i; k++)
00039 for(Index w=0; w<BlockRows; w++)
00040 blockA[count++] = lhs(i+w,k);
00041
00042 Index h = 0;
00043 for(Index k=i; k<i+BlockRows; k++)
00044 {
00045 for(Index w=0; w<h; w++)
00046 blockA[count++] = conj(lhs(k, i+w));
00047
00048 blockA[count++] = real(lhs(k,k));
00049
00050 for(Index w=h+1; w<BlockRows; w++)
00051 blockA[count++] = lhs(i+w, k);
00052 ++h;
00053 }
00054
00055 for(Index k=i+BlockRows; k<cols; k++)
00056 for(Index w=0; w<BlockRows; w++)
00057 blockA[count++] = conj(lhs(k, i+w));
00058 }
00059 void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows)
00060 {
00061 const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride);
00062 Index count = 0;
00063 Index peeled_mc = (rows/Pack1)*Pack1;
00064 for(Index i=0; i<peeled_mc; i+=Pack1)
00065 {
00066 pack<Pack1>(blockA, lhs, cols, i, count);
00067 }
00068
00069 if(rows-peeled_mc>=Pack2)
00070 {
00071 pack<Pack2>(blockA, lhs, cols, peeled_mc, count);
00072 peeled_mc += Pack2;
00073 }
00074
00075
00076 for(Index i=peeled_mc; i<rows; i++)
00077 {
00078 for(Index k=0; k<i; k++)
00079 blockA[count++] = lhs(i, k);
00080
00081 blockA[count++] = real(lhs(i, i));
00082
00083 for(Index k=i+1; k<cols; k++)
00084 blockA[count++] = conj(lhs(k, i));
00085 }
00086 }
00087 };
00088
00089 template<typename Scalar, typename Index, int nr, int StorageOrder>
00090 struct symm_pack_rhs
00091 {
00092 enum { PacketSize = packet_traits<Scalar>::size };
00093 void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
00094 {
00095 Index end_k = k2 + rows;
00096 Index count = 0;
00097 const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride);
00098 Index packet_cols = (cols/nr)*nr;
00099
00100
00101 for(Index j2=0; j2<k2; j2+=nr)
00102 {
00103 for(Index k=k2; k<end_k; k++)
00104 {
00105 blockB[count+0] = rhs(k,j2+0);
00106 blockB[count+1] = rhs(k,j2+1);
00107 if (nr==4)
00108 {
00109 blockB[count+2] = rhs(k,j2+2);
00110 blockB[count+3] = rhs(k,j2+3);
00111 }
00112 count += nr;
00113 }
00114 }
00115
00116
00117 for(Index j2=k2; j2<(std::min)(k2+rows,packet_cols); j2+=nr)
00118 {
00119
00120
00121 for(Index k=k2; k<j2; k++)
00122 {
00123 blockB[count+0] = conj(rhs(j2+0,k));
00124 blockB[count+1] = conj(rhs(j2+1,k));
00125 if (nr==4)
00126 {
00127 blockB[count+2] = conj(rhs(j2+2,k));
00128 blockB[count+3] = conj(rhs(j2+3,k));
00129 }
00130 count += nr;
00131 }
00132
00133 Index h = 0;
00134 for(Index k=j2; k<j2+nr; k++)
00135 {
00136
00137 for (Index w=0 ; w<h; ++w)
00138 blockB[count+w] = rhs(k,j2+w);
00139
00140 blockB[count+h] = real(rhs(k,k));
00141
00142
00143 for (Index w=h+1 ; w<nr; ++w)
00144 blockB[count+w] = conj(rhs(j2+w,k));
00145 count += nr;
00146 ++h;
00147 }
00148
00149 for(Index k=j2+nr; k<end_k; k++)
00150 {
00151 blockB[count+0] = rhs(k,j2+0);
00152 blockB[count+1] = rhs(k,j2+1);
00153 if (nr==4)
00154 {
00155 blockB[count+2] = rhs(k,j2+2);
00156 blockB[count+3] = rhs(k,j2+3);
00157 }
00158 count += nr;
00159 }
00160 }
00161
00162
00163 for(Index j2=k2+rows; j2<packet_cols; j2+=nr)
00164 {
00165 for(Index k=k2; k<end_k; k++)
00166 {
00167 blockB[count+0] = conj(rhs(j2+0,k));
00168 blockB[count+1] = conj(rhs(j2+1,k));
00169 if (nr==4)
00170 {
00171 blockB[count+2] = conj(rhs(j2+2,k));
00172 blockB[count+3] = conj(rhs(j2+3,k));
00173 }
00174 count += nr;
00175 }
00176 }
00177
00178
00179 for(Index j2=packet_cols; j2<cols; ++j2)
00180 {
00181
00182 Index half = (std::min)(end_k,j2);
00183 for(Index k=k2; k<half; k++)
00184 {
00185 blockB[count] = conj(rhs(j2,k));
00186 count += 1;
00187 }
00188
00189 if(half==j2 && half<k2+rows)
00190 {
00191 blockB[count] = real(rhs(j2,j2));
00192 count += 1;
00193 }
00194 else
00195 half--;
00196
00197
00198 for(Index k=half+1; k<k2+rows; k++)
00199 {
00200 blockB[count] = rhs(k,j2);
00201 count += 1;
00202 }
00203 }
00204 }
00205 };
00206
00207
00208
00209
00210 template <typename Scalar, typename Index,
00211 int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
00212 int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
00213 int ResStorageOrder>
00214 struct product_selfadjoint_matrix;
00215
00216 template <typename Scalar, typename Index,
00217 int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
00218 int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs>
00219 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor>
00220 {
00221
00222 static EIGEN_STRONG_INLINE void run(
00223 Index rows, Index cols,
00224 const Scalar* lhs, Index lhsStride,
00225 const Scalar* rhs, Index rhsStride,
00226 Scalar* res, Index resStride,
00227 Scalar alpha)
00228 {
00229 product_selfadjoint_matrix<Scalar, Index,
00230 EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
00231 RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs),
00232 EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
00233 LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),
00234 ColMajor>
00235 ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha);
00236 }
00237 };
00238
00239 template <typename Scalar, typename Index,
00240 int LhsStorageOrder, bool ConjugateLhs,
00241 int RhsStorageOrder, bool ConjugateRhs>
00242 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>
00243 {
00244
00245 static EIGEN_DONT_INLINE void run(
00246 Index rows, Index cols,
00247 const Scalar* _lhs, Index lhsStride,
00248 const Scalar* _rhs, Index rhsStride,
00249 Scalar* res, Index resStride,
00250 Scalar alpha)
00251 {
00252 Index size = rows;
00253
00254 const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
00255 const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
00256
00257 typedef gebp_traits<Scalar,Scalar> Traits;
00258
00259 Index kc = size;
00260 Index mc = rows;
00261 Index nc = cols;
00262 computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
00263
00264 kc = (std::min)(kc,mc);
00265
00266 std::size_t sizeW = kc*Traits::WorkSpaceFactor;
00267 std::size_t sizeB = sizeW + kc*cols;
00268 ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
00269 ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
00270 Scalar* blockB = allocatedBlockB + sizeW;
00271
00272 gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
00273 symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
00274 gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
00275 gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
00276
00277 for(Index k2=0; k2<size; k2+=kc)
00278 {
00279 const Index actual_kc = (std::min)(k2+kc,size)-k2;
00280
00281
00282
00283
00284 pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols);
00285
00286
00287
00288
00289
00290 for(Index i2=0; i2<k2; i2+=mc)
00291 {
00292 const Index actual_mc = (std::min)(i2+mc,k2)-i2;
00293
00294 pack_lhs_transposed(blockA, &lhs(k2, i2), lhsStride, actual_kc, actual_mc);
00295
00296 gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
00297 }
00298
00299 {
00300 const Index actual_mc = (std::min)(k2+kc,size)-k2;
00301
00302 pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);
00303
00304 gebp_kernel(res+k2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
00305 }
00306
00307 for(Index i2=k2+kc; i2<size; i2+=mc)
00308 {
00309 const Index actual_mc = (std::min)(i2+mc,size)-i2;
00310 gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>()
00311 (blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
00312
00313 gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
00314 }
00315 }
00316 }
00317 };
00318
00319
00320 template <typename Scalar, typename Index,
00321 int LhsStorageOrder, bool ConjugateLhs,
00322 int RhsStorageOrder, bool ConjugateRhs>
00323 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>
00324 {
00325
00326 static EIGEN_DONT_INLINE void run(
00327 Index rows, Index cols,
00328 const Scalar* _lhs, Index lhsStride,
00329 const Scalar* _rhs, Index rhsStride,
00330 Scalar* res, Index resStride,
00331 Scalar alpha)
00332 {
00333 Index size = cols;
00334
00335 const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
00336
00337 typedef gebp_traits<Scalar,Scalar> Traits;
00338
00339 Index kc = size;
00340 Index mc = rows;
00341 Index nc = cols;
00342 computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
00343 std::size_t sizeW = kc*Traits::WorkSpaceFactor;
00344 std::size_t sizeB = sizeW + kc*cols;
00345 ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
00346 ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
00347 Scalar* blockB = allocatedBlockB + sizeW;
00348
00349 gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
00350 gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
00351 symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
00352
00353 for(Index k2=0; k2<size; k2+=kc)
00354 {
00355 const Index actual_kc = (std::min)(k2+kc,size)-k2;
00356
00357 pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2);
00358
00359
00360 for(Index i2=0; i2<rows; i2+=mc)
00361 {
00362 const Index actual_mc = (std::min)(i2+mc,rows)-i2;
00363 pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
00364
00365 gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
00366 }
00367 }
00368 }
00369 };
00370
00371 }
00372
00373
00374
00375
00376
00377 namespace internal {
00378 template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
00379 struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> >
00380 : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs> >
00381 {};
00382 }
00383
00384 template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
00385 struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
00386 : public ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs >
00387 {
00388 EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix)
00389
00390 SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
00391
00392 enum {
00393 LhsIsUpper = (LhsMode&(Upper|Lower))==Upper,
00394 LhsIsSelfAdjoint = (LhsMode&SelfAdjoint)==SelfAdjoint,
00395 RhsIsUpper = (RhsMode&(Upper|Lower))==Upper,
00396 RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint
00397 };
00398
00399 template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
00400 {
00401 eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
00402
00403 const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
00404 const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
00405
00406 Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
00407 * RhsBlasTraits::extractScalarFactor(m_rhs);
00408
00409 internal::product_selfadjoint_matrix<Scalar, Index,
00410 EIGEN_LOGICAL_XOR(LhsIsUpper,
00411 internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
00412 NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
00413 EIGEN_LOGICAL_XOR(RhsIsUpper,
00414 internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
00415 NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)),
00416 internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor>
00417 ::run(
00418 lhs.rows(), rhs.cols(),
00419 &lhs.coeffRef(0,0), lhs.outerStride(),
00420 &rhs.coeffRef(0,0), rhs.outerStride(),
00421 &dst.coeffRef(0,0), dst.outerStride(),
00422 actualAlpha
00423 );
00424 }
00425 };
00426
00427 #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H