00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H
00011 #define EIGEN_SELFADJOINT_MATRIX_MATRIX_H
00012
00013 namespace Eigen {
00014
00015 namespace internal {
00016
00017
00018 template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder>
00019 struct symm_pack_lhs
00020 {
00021 template<int BlockRows> inline
00022 void pack(Scalar* blockA, const const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count)
00023 {
00024
00025 for(Index k=0; k<i; k++)
00026 for(Index w=0; w<BlockRows; w++)
00027 blockA[count++] = lhs(i+w,k);
00028
00029 Index h = 0;
00030 for(Index k=i; k<i+BlockRows; k++)
00031 {
00032 for(Index w=0; w<h; w++)
00033 blockA[count++] = numext::conj(lhs(k, i+w));
00034
00035 blockA[count++] = numext::real(lhs(k,k));
00036
00037 for(Index w=h+1; w<BlockRows; w++)
00038 blockA[count++] = lhs(i+w, k);
00039 ++h;
00040 }
00041
00042 for(Index k=i+BlockRows; k<cols; k++)
00043 for(Index w=0; w<BlockRows; w++)
00044 blockA[count++] = numext::conj(lhs(k, i+w));
00045 }
00046 void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows)
00047 {
00048 const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride);
00049 Index count = 0;
00050 Index peeled_mc = (rows/Pack1)*Pack1;
00051 for(Index i=0; i<peeled_mc; i+=Pack1)
00052 {
00053 pack<Pack1>(blockA, lhs, cols, i, count);
00054 }
00055
00056 if(rows-peeled_mc>=Pack2)
00057 {
00058 pack<Pack2>(blockA, lhs, cols, peeled_mc, count);
00059 peeled_mc += Pack2;
00060 }
00061
00062
00063 for(Index i=peeled_mc; i<rows; i++)
00064 {
00065 for(Index k=0; k<i; k++)
00066 blockA[count++] = lhs(i, k);
00067
00068 blockA[count++] = numext::real(lhs(i, i));
00069
00070 for(Index k=i+1; k<cols; k++)
00071 blockA[count++] = numext::conj(lhs(k, i));
00072 }
00073 }
00074 };
00075
00076 template<typename Scalar, typename Index, int nr, int StorageOrder>
00077 struct symm_pack_rhs
00078 {
00079 enum { PacketSize = packet_traits<Scalar>::size };
00080 void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
00081 {
00082 Index end_k = k2 + rows;
00083 Index count = 0;
00084 const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride);
00085 Index packet_cols = (cols/nr)*nr;
00086
00087
00088 for(Index j2=0; j2<k2; j2+=nr)
00089 {
00090 for(Index k=k2; k<end_k; k++)
00091 {
00092 blockB[count+0] = rhs(k,j2+0);
00093 blockB[count+1] = rhs(k,j2+1);
00094 if (nr==4)
00095 {
00096 blockB[count+2] = rhs(k,j2+2);
00097 blockB[count+3] = rhs(k,j2+3);
00098 }
00099 count += nr;
00100 }
00101 }
00102
00103
00104 for(Index j2=k2; j2<(std::min)(k2+rows,packet_cols); j2+=nr)
00105 {
00106
00107
00108 for(Index k=k2; k<j2; k++)
00109 {
00110 blockB[count+0] = numext::conj(rhs(j2+0,k));
00111 blockB[count+1] = numext::conj(rhs(j2+1,k));
00112 if (nr==4)
00113 {
00114 blockB[count+2] = numext::conj(rhs(j2+2,k));
00115 blockB[count+3] = numext::conj(rhs(j2+3,k));
00116 }
00117 count += nr;
00118 }
00119
00120 Index h = 0;
00121 for(Index k=j2; k<j2+nr; k++)
00122 {
00123
00124 for (Index w=0 ; w<h; ++w)
00125 blockB[count+w] = rhs(k,j2+w);
00126
00127 blockB[count+h] = numext::real(rhs(k,k));
00128
00129
00130 for (Index w=h+1 ; w<nr; ++w)
00131 blockB[count+w] = numext::conj(rhs(j2+w,k));
00132 count += nr;
00133 ++h;
00134 }
00135
00136 for(Index k=j2+nr; k<end_k; k++)
00137 {
00138 blockB[count+0] = rhs(k,j2+0);
00139 blockB[count+1] = rhs(k,j2+1);
00140 if (nr==4)
00141 {
00142 blockB[count+2] = rhs(k,j2+2);
00143 blockB[count+3] = rhs(k,j2+3);
00144 }
00145 count += nr;
00146 }
00147 }
00148
00149
00150 for(Index j2=k2+rows; j2<packet_cols; j2+=nr)
00151 {
00152 for(Index k=k2; k<end_k; k++)
00153 {
00154 blockB[count+0] = numext::conj(rhs(j2+0,k));
00155 blockB[count+1] = numext::conj(rhs(j2+1,k));
00156 if (nr==4)
00157 {
00158 blockB[count+2] = numext::conj(rhs(j2+2,k));
00159 blockB[count+3] = numext::conj(rhs(j2+3,k));
00160 }
00161 count += nr;
00162 }
00163 }
00164
00165
00166 for(Index j2=packet_cols; j2<cols; ++j2)
00167 {
00168
00169 Index half = (std::min)(end_k,j2);
00170 for(Index k=k2; k<half; k++)
00171 {
00172 blockB[count] = numext::conj(rhs(j2,k));
00173 count += 1;
00174 }
00175
00176 if(half==j2 && half<k2+rows)
00177 {
00178 blockB[count] = numext::real(rhs(j2,j2));
00179 count += 1;
00180 }
00181 else
00182 half--;
00183
00184
00185 for(Index k=half+1; k<k2+rows; k++)
00186 {
00187 blockB[count] = rhs(k,j2);
00188 count += 1;
00189 }
00190 }
00191 }
00192 };
00193
00194
00195
00196
00197 template <typename Scalar, typename Index,
00198 int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
00199 int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
00200 int ResStorageOrder>
00201 struct product_selfadjoint_matrix;
00202
00203 template <typename Scalar, typename Index,
00204 int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
00205 int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs>
00206 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor>
00207 {
00208
00209 static EIGEN_STRONG_INLINE void run(
00210 Index rows, Index cols,
00211 const Scalar* lhs, Index lhsStride,
00212 const Scalar* rhs, Index rhsStride,
00213 Scalar* res, Index resStride,
00214 const Scalar& alpha)
00215 {
00216 product_selfadjoint_matrix<Scalar, Index,
00217 EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
00218 RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs),
00219 EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
00220 LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),
00221 ColMajor>
00222 ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha);
00223 }
00224 };
00225
00226 template <typename Scalar, typename Index,
00227 int LhsStorageOrder, bool ConjugateLhs,
00228 int RhsStorageOrder, bool ConjugateRhs>
00229 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>
00230 {
00231
00232 static EIGEN_DONT_INLINE void run(
00233 Index rows, Index cols,
00234 const Scalar* _lhs, Index lhsStride,
00235 const Scalar* _rhs, Index rhsStride,
00236 Scalar* res, Index resStride,
00237 const Scalar& alpha);
00238 };
00239
00240 template <typename Scalar, typename Index,
00241 int LhsStorageOrder, bool ConjugateLhs,
00242 int RhsStorageOrder, bool ConjugateRhs>
00243 EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>::run(
00244 Index rows, Index cols,
00245 const Scalar* _lhs, Index lhsStride,
00246 const Scalar* _rhs, Index rhsStride,
00247 Scalar* res, Index resStride,
00248 const Scalar& alpha)
00249 {
00250 Index size = rows;
00251
00252 const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
00253 const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
00254
00255 typedef gebp_traits<Scalar,Scalar> Traits;
00256
00257 Index kc = size;
00258 Index mc = rows;
00259 Index nc = cols;
00260 computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
00261
00262 kc = (std::min)(kc,mc);
00263
00264 std::size_t sizeW = kc*Traits::WorkSpaceFactor;
00265 std::size_t sizeB = sizeW + kc*cols;
00266 ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
00267 ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
00268 Scalar* blockB = allocatedBlockB + sizeW;
00269
00270 gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
00271 symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
00272 gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
00273 gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
00274
00275 for(Index k2=0; k2<size; k2+=kc)
00276 {
00277 const Index actual_kc = (std::min)(k2+kc,size)-k2;
00278
00279
00280
00281
00282 pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols);
00283
00284
00285
00286
00287
00288 for(Index i2=0; i2<k2; i2+=mc)
00289 {
00290 const Index actual_mc = (std::min)(i2+mc,k2)-i2;
00291
00292 pack_lhs_transposed(blockA, &lhs(k2, i2), lhsStride, actual_kc, actual_mc);
00293
00294 gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
00295 }
00296
00297 {
00298 const Index actual_mc = (std::min)(k2+kc,size)-k2;
00299
00300 pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);
00301
00302 gebp_kernel(res+k2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
00303 }
00304
00305 for(Index i2=k2+kc; i2<size; i2+=mc)
00306 {
00307 const Index actual_mc = (std::min)(i2+mc,size)-i2;
00308 gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>()
00309 (blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
00310
00311 gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
00312 }
00313 }
00314 }
00315
00316
00317 template <typename Scalar, typename Index,
00318 int LhsStorageOrder, bool ConjugateLhs,
00319 int RhsStorageOrder, bool ConjugateRhs>
00320 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>
00321 {
00322
00323 static EIGEN_DONT_INLINE void run(
00324 Index rows, Index cols,
00325 const Scalar* _lhs, Index lhsStride,
00326 const Scalar* _rhs, Index rhsStride,
00327 Scalar* res, Index resStride,
00328 const Scalar& alpha);
00329 };
00330
00331 template <typename Scalar, typename Index,
00332 int LhsStorageOrder, bool ConjugateLhs,
00333 int RhsStorageOrder, bool ConjugateRhs>
00334 EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>::run(
00335 Index rows, Index cols,
00336 const Scalar* _lhs, Index lhsStride,
00337 const Scalar* _rhs, Index rhsStride,
00338 Scalar* res, Index resStride,
00339 const Scalar& alpha)
00340 {
00341 Index size = cols;
00342
00343 const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
00344
00345 typedef gebp_traits<Scalar,Scalar> Traits;
00346
00347 Index kc = size;
00348 Index mc = rows;
00349 Index nc = cols;
00350 computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
00351 std::size_t sizeW = kc*Traits::WorkSpaceFactor;
00352 std::size_t sizeB = sizeW + kc*cols;
00353 ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
00354 ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
00355 Scalar* blockB = allocatedBlockB + sizeW;
00356
00357 gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
00358 gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
00359 symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
00360
00361 for(Index k2=0; k2<size; k2+=kc)
00362 {
00363 const Index actual_kc = (std::min)(k2+kc,size)-k2;
00364
00365 pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2);
00366
00367
00368 for(Index i2=0; i2<rows; i2+=mc)
00369 {
00370 const Index actual_mc = (std::min)(i2+mc,rows)-i2;
00371 pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
00372
00373 gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
00374 }
00375 }
00376 }
00377
00378 }
00379
00380
00381
00382
00383
00384 namespace internal {
00385 template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
00386 struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> >
00387 : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs> >
00388 {};
00389 }
00390
00391 template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
00392 struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
00393 : public ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs >
00394 {
00395 EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix)
00396
00397 SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
00398
00399 enum {
00400 LhsIsUpper = (LhsMode&(Upper|Lower))==Upper,
00401 LhsIsSelfAdjoint = (LhsMode&SelfAdjoint)==SelfAdjoint,
00402 RhsIsUpper = (RhsMode&(Upper|Lower))==Upper,
00403 RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint
00404 };
00405
00406 template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
00407 {
00408 eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
00409
00410 typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
00411 typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
00412
00413 Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
00414 * RhsBlasTraits::extractScalarFactor(m_rhs);
00415
00416 internal::product_selfadjoint_matrix<Scalar, Index,
00417 EIGEN_LOGICAL_XOR(LhsIsUpper,
00418 internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
00419 NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
00420 EIGEN_LOGICAL_XOR(RhsIsUpper,
00421 internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
00422 NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)),
00423 internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor>
00424 ::run(
00425 lhs.rows(), rhs.cols(),
00426 &lhs.coeffRef(0,0), lhs.outerStride(),
00427 &rhs.coeffRef(0,0), rhs.outerStride(),
00428 &dst.coeffRef(0,0), dst.outerStride(),
00429 actualAlpha
00430 );
00431 }
00432 };
00433
00434 }
00435
00436 #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H