SelfadjointMatrixVector.h
Go to the documentation of this file.
00001 // This file is part of Eigen, a lightweight C++ template library
00002 // for linear algebra.
00003 //
00004 // Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
00005 //
00006 // This Source Code Form is subject to the terms of the Mozilla
00007 // Public License v. 2.0. If a copy of the MPL was not distributed
00008 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
00009 
00010 #ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_H
00011 #define EIGEN_SELFADJOINT_MATRIX_VECTOR_H
00012 
00013 namespace Eigen { 
00014 
00015 namespace internal {
00016 
00017 /* Optimized selfadjoint matrix * vector product:
00018  * This algorithm processes 2 columns at onces that allows to both reduce
00019  * the number of load/stores of the result by a factor 2 and to reduce
00020  * the instruction dependency.
00021  */
00022 
00023 template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version=Specialized>
00024 struct selfadjoint_matrix_vector_product;
00025 
00026 template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version>
00027 struct selfadjoint_matrix_vector_product
00028 
00029 {
00030 static EIGEN_DONT_INLINE void run(
00031   Index size,
00032   const Scalar*  lhs, Index lhsStride,
00033   const Scalar* _rhs, Index rhsIncr,
00034   Scalar* res,
00035   Scalar alpha)
00036 {
00037   typedef typename packet_traits<Scalar>::type Packet;
00038   typedef typename NumTraits<Scalar>::Real RealScalar;
00039   const Index PacketSize = sizeof(Packet)/sizeof(Scalar);
00040 
00041   enum {
00042     IsRowMajor = StorageOrder==RowMajor ? 1 : 0,
00043     IsLower = UpLo == Lower ? 1 : 0,
00044     FirstTriangular = IsRowMajor == IsLower
00045   };
00046 
00047   conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs,  IsRowMajor), ConjugateRhs> cj0;
00048   conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1;
00049   conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex, ConjugateRhs> cjd;
00050 
00051   conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs,  IsRowMajor), ConjugateRhs> pcj0;
00052   conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> pcj1;
00053 
00054   Scalar cjAlpha = ConjugateRhs ? conj(alpha) : alpha;
00055 
00056   // FIXME this copy is now handled outside product_selfadjoint_vector, so it could probably be removed.
00057   // if the rhs is not sequentially stored in memory we copy it to a temporary buffer,
00058   // this is because we need to extract packets
00059   ei_declare_aligned_stack_constructed_variable(Scalar,rhs,size,rhsIncr==1 ? const_cast<Scalar*>(_rhs) : 0);  
00060   if (rhsIncr!=1)
00061   {
00062     const Scalar* it = _rhs;
00063     for (Index i=0; i<size; ++i, it+=rhsIncr)
00064       rhs[i] = *it;
00065   }
00066 
00067   Index bound = (std::max)(Index(0),size-8) & 0xfffffffe;
00068   if (FirstTriangular)
00069     bound = size - bound;
00070 
00071   for (Index j=FirstTriangular ? bound : 0;
00072        j<(FirstTriangular ? size : bound);j+=2)
00073   {
00074     register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
00075     register const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride;
00076 
00077     Scalar t0 = cjAlpha * rhs[j];
00078     Packet ptmp0 = pset1<Packet>(t0);
00079     Scalar t1 = cjAlpha * rhs[j+1];
00080     Packet ptmp1 = pset1<Packet>(t1);
00081 
00082     Scalar t2(0);
00083     Packet ptmp2 = pset1<Packet>(t2);
00084     Scalar t3(0);
00085     Packet ptmp3 = pset1<Packet>(t3);
00086 
00087     size_t starti = FirstTriangular ? 0 : j+2;
00088     size_t endi   = FirstTriangular ? j : size;
00089     size_t alignedStart = (starti) + internal::first_aligned(&res[starti], endi-starti);
00090     size_t alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
00091 
00092     // TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
00093     res[j]   += cjd.pmul(internal::real(A0[j]), t0);
00094     res[j+1] += cjd.pmul(internal::real(A1[j+1]), t1);
00095     if(FirstTriangular)
00096     {
00097       res[j]   += cj0.pmul(A1[j],   t1);
00098       t3       += cj1.pmul(A1[j],   rhs[j]);
00099     }
00100     else
00101     {
00102       res[j+1] += cj0.pmul(A0[j+1],t0);
00103       t2 += cj1.pmul(A0[j+1], rhs[j+1]);
00104     }
00105 
00106     for (size_t i=starti; i<alignedStart; ++i)
00107     {
00108       res[i] += t0 * A0[i] + t1 * A1[i];
00109       t2 += conj(A0[i]) * rhs[i];
00110       t3 += conj(A1[i]) * rhs[i];
00111     }
00112     // Yes this an optimization for gcc 4.3 and 4.4 (=> huge speed up)
00113     // gcc 4.2 does this optimization automatically.
00114     const Scalar* EIGEN_RESTRICT a0It  = A0  + alignedStart;
00115     const Scalar* EIGEN_RESTRICT a1It  = A1  + alignedStart;
00116     const Scalar* EIGEN_RESTRICT rhsIt = rhs + alignedStart;
00117           Scalar* EIGEN_RESTRICT resIt = res + alignedStart;
00118     for (size_t i=alignedStart; i<alignedEnd; i+=PacketSize)
00119     {
00120       Packet A0i = ploadu<Packet>(a0It);  a0It  += PacketSize;
00121       Packet A1i = ploadu<Packet>(a1It);  a1It  += PacketSize;
00122       Packet Bi  = ploadu<Packet>(rhsIt); rhsIt += PacketSize; // FIXME should be aligned in most cases
00123       Packet Xi  = pload <Packet>(resIt);
00124 
00125       Xi    = pcj0.pmadd(A0i,ptmp0, pcj0.pmadd(A1i,ptmp1,Xi));
00126       ptmp2 = pcj1.pmadd(A0i,  Bi, ptmp2);
00127       ptmp3 = pcj1.pmadd(A1i,  Bi, ptmp3);
00128       pstore(resIt,Xi); resIt += PacketSize;
00129     }
00130     for (size_t i=alignedEnd; i<endi; i++)
00131     {
00132       res[i] += cj0.pmul(A0[i], t0) + cj0.pmul(A1[i],t1);
00133       t2 += cj1.pmul(A0[i], rhs[i]);
00134       t3 += cj1.pmul(A1[i], rhs[i]);
00135     }
00136 
00137     res[j]   += alpha * (t2 + predux(ptmp2));
00138     res[j+1] += alpha * (t3 + predux(ptmp3));
00139   }
00140   for (Index j=FirstTriangular ? 0 : bound;j<(FirstTriangular ? bound : size);j++)
00141   {
00142     register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
00143 
00144     Scalar t1 = cjAlpha * rhs[j];
00145     Scalar t2(0);
00146     // TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
00147     res[j] += cjd.pmul(internal::real(A0[j]), t1);
00148     for (Index i=FirstTriangular ? 0 : j+1; i<(FirstTriangular ? j : size); i++)
00149     {
00150       res[i] += cj0.pmul(A0[i], t1);
00151       t2 += cj1.pmul(A0[i], rhs[i]);
00152     }
00153     res[j] += alpha * t2;
00154   }
00155 }
00156 };
00157 
00158 } // end namespace internal 
00159 
00160 /***************************************************************************
00161 * Wrapper to product_selfadjoint_vector
00162 ***************************************************************************/
00163 
00164 namespace internal {
00165 template<typename Lhs, int LhsMode, typename Rhs>
00166 struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> >
00167   : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs> >
00168 {};
00169 }
00170 
00171 template<typename Lhs, int LhsMode, typename Rhs>
00172 struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
00173   : public ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs >
00174 {
00175   EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix)
00176 
00177   enum {
00178     LhsUpLo = LhsMode&(Upper|Lower)
00179   };
00180 
00181   SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
00182 
00183   template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
00184   {
00185     typedef typename Dest::Scalar ResScalar;
00186     typedef typename Base::RhsScalar RhsScalar;
00187     typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
00188     
00189     eigen_assert(dest.rows()==m_lhs.rows() && dest.cols()==m_rhs.cols());
00190 
00191     typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
00192     typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
00193 
00194     Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
00195                                * RhsBlasTraits::extractScalarFactor(m_rhs);
00196 
00197     enum {
00198       EvalToDest = (Dest::InnerStrideAtCompileTime==1),
00199       UseRhs = (_ActualRhsType::InnerStrideAtCompileTime==1)
00200     };
00201     
00202     internal::gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,!EvalToDest> static_dest;
00203     internal::gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!UseRhs> static_rhs;
00204 
00205     ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
00206                                                   EvalToDest ? dest.data() : static_dest.data());
00207                                                   
00208     ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,rhs.size(),
00209         UseRhs ? const_cast<RhsScalar*>(rhs.data()) : static_rhs.data());
00210     
00211     if(!EvalToDest)
00212     {
00213       #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
00214       int size = dest.size();
00215       EIGEN_DENSE_STORAGE_CTOR_PLUGIN
00216       #endif
00217       MappedDest(actualDestPtr, dest.size()) = dest;
00218     }
00219       
00220     if(!UseRhs)
00221     {
00222       #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
00223       int size = rhs.size();
00224       EIGEN_DENSE_STORAGE_CTOR_PLUGIN
00225       #endif
00226       Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, rhs.size()) = rhs;
00227     }
00228       
00229       
00230     internal::selfadjoint_matrix_vector_product<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>::run
00231       (
00232         lhs.rows(),                             // size
00233         &lhs.coeffRef(0,0),  lhs.outerStride(), // lhs info
00234         actualRhsPtr, 1,                        // rhs info
00235         actualDestPtr,                          // result info
00236         actualAlpha                             // scale factor
00237       );
00238     
00239     if(!EvalToDest)
00240       dest = MappedDest(actualDestPtr, dest.size());
00241   }
00242 };
00243 
00244 namespace internal {
00245 template<typename Lhs, typename Rhs, int RhsMode>
00246 struct traits<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false> >
00247   : traits<ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs> >
00248 {};
00249 }
00250 
00251 template<typename Lhs, typename Rhs, int RhsMode>
00252 struct SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>
00253   : public ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs >
00254 {
00255   EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix)
00256 
00257   enum {
00258     RhsUpLo = RhsMode&(Upper|Lower)
00259   };
00260 
00261   SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
00262 
00263   template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
00264   {
00265     // let's simply transpose the product
00266     Transpose<Dest> destT(dest);
00267     SelfadjointProductMatrix<Transpose<const Rhs>, int(RhsUpLo)==Upper ? Lower : Upper, false,
00268                              Transpose<const Lhs>, 0, true>(m_rhs.transpose(), m_lhs.transpose()).scaleAndAddTo(destT, alpha);
00269   }
00270 };
00271 
00272 } // end namespace Eigen
00273 
00274 #endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_H


win_eigen
Author(s): Daniel Stonier
autogenerated on Mon Oct 6 2014 12:25:52