CoeffBasedProduct.h
Go to the documentation of this file.
00001 // This file is part of Eigen, a lightweight C++ template library
00002 // for linear algebra.
00003 //
00004 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
00005 // Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
00006 //
00007 // This Source Code Form is subject to the terms of the Mozilla
00008 // Public License v. 2.0. If a copy of the MPL was not distributed
00009 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
00010 
00011 #ifndef EIGEN_COEFFBASED_PRODUCT_H
00012 #define EIGEN_COEFFBASED_PRODUCT_H
00013 
00014 namespace Eigen { 
00015 
00016 namespace internal {
00017 
00018 /*********************************************************************************
00019 *  Coefficient based product implementation.
00020 *  It is designed for the following use cases:
00021 *  - small fixed sizes
00022 *  - lazy products
00023 *********************************************************************************/
00024 
00025 /* Since the all the dimensions of the product are small, here we can rely
00026  * on the generic Assign mechanism to evaluate the product per coeff (or packet).
00027  *
00028  * Note that here the inner-loops should always be unrolled.
00029  */
00030 
00031 template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
00032 struct product_coeff_impl;
00033 
00034 template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
00035 struct product_packet_impl;
00036 
00037 template<typename LhsNested, typename RhsNested, int NestingFlags>
00038 struct traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
00039 {
00040   typedef MatrixXpr XprKind;
00041   typedef typename remove_all<LhsNested>::type _LhsNested;
00042   typedef typename remove_all<RhsNested>::type _RhsNested;
00043   typedef typename scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar;
00044   typedef typename promote_storage_type<typename traits<_LhsNested>::StorageKind,
00045                                            typename traits<_RhsNested>::StorageKind>::ret StorageKind;
00046   typedef typename promote_index_type<typename traits<_LhsNested>::Index,
00047                                          typename traits<_RhsNested>::Index>::type Index;
00048 
00049   enum {
00050       LhsCoeffReadCost = _LhsNested::CoeffReadCost,
00051       RhsCoeffReadCost = _RhsNested::CoeffReadCost,
00052       LhsFlags = _LhsNested::Flags,
00053       RhsFlags = _RhsNested::Flags,
00054 
00055       RowsAtCompileTime = _LhsNested::RowsAtCompileTime,
00056       ColsAtCompileTime = _RhsNested::ColsAtCompileTime,
00057       InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime),
00058 
00059       MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime,
00060       MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime,
00061 
00062       LhsRowMajor = LhsFlags & RowMajorBit,
00063       RhsRowMajor = RhsFlags & RowMajorBit,
00064 
00065       SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value,
00066 
00067       CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
00068                       && (ColsAtCompileTime == Dynamic
00069                           || ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0
00070                               && (RhsFlags&AlignedBit)
00071                              )
00072                          ),
00073 
00074       CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
00075                       && (RowsAtCompileTime == Dynamic
00076                           || ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0
00077                               && (LhsFlags&AlignedBit)
00078                              )
00079                          ),
00080 
00081       EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
00082                      : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
00083                      : (RhsRowMajor && !CanVectorizeLhs),
00084 
00085       Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
00086             | (EvalToRowMajor ? RowMajorBit : 0)
00087             | NestingFlags
00088             | (LhsFlags & RhsFlags & AlignedBit)
00089             // TODO enable vectorization for mixed types
00090             | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),
00091 
00092       CoeffReadCost = InnerSize == Dynamic ? Dynamic
00093                     : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
00094                       + (InnerSize - 1) * NumTraits<Scalar>::AddCost,
00095 
00096       /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
00097       * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
00098       * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
00099       * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
00100       */
00101       CanVectorizeInner =    SameType
00102                           && LhsRowMajor
00103                           && (!RhsRowMajor)
00104                           && (LhsFlags & RhsFlags & ActualPacketAccessBit)
00105                           && (LhsFlags & RhsFlags & AlignedBit)
00106                           && (InnerSize % packet_traits<Scalar>::size == 0)
00107     };
00108 };
00109 
00110 } // end namespace internal
00111 
00112 template<typename LhsNested, typename RhsNested, int NestingFlags>
00113 class CoeffBasedProduct
00114   : internal::no_assignment_operator,
00115     public MatrixBase<CoeffBasedProduct<LhsNested, RhsNested, NestingFlags> >
00116 {
00117   public:
00118 
00119     typedef MatrixBase<CoeffBasedProduct> Base;
00120     EIGEN_DENSE_PUBLIC_INTERFACE(CoeffBasedProduct)
00121     typedef typename Base::PlainObject PlainObject;
00122 
00123   private:
00124 
00125     typedef typename internal::traits<CoeffBasedProduct>::_LhsNested _LhsNested;
00126     typedef typename internal::traits<CoeffBasedProduct>::_RhsNested _RhsNested;
00127 
00128     enum {
00129       PacketSize = internal::packet_traits<Scalar>::size,
00130       InnerSize  = internal::traits<CoeffBasedProduct>::InnerSize,
00131       Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
00132       CanVectorizeInner = internal::traits<CoeffBasedProduct>::CanVectorizeInner
00133     };
00134 
00135     typedef internal::product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
00136                                    Unroll ? InnerSize-1 : Dynamic,
00137                                    _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl;
00138 
00139     typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType;
00140 
00141   public:
00142 
00143     inline CoeffBasedProduct(const CoeffBasedProduct& other)
00144       : Base(), m_lhs(other.m_lhs), m_rhs(other.m_rhs)
00145     {}
00146 
00147     template<typename Lhs, typename Rhs>
00148     inline CoeffBasedProduct(const Lhs& lhs, const Rhs& rhs)
00149       : m_lhs(lhs), m_rhs(rhs)
00150     {
00151       // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable.
00152       // We still allow to mix T and complex<T>.
00153       EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
00154         YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
00155       eigen_assert(lhs.cols() == rhs.rows()
00156         && "invalid matrix product"
00157         && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
00158     }
00159 
00160     EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
00161     EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
00162 
00163     EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
00164     {
00165       Scalar res;
00166       ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
00167       return res;
00168     }
00169 
00170     /* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
00171      * which is why we don't set the LinearAccessBit.
00172      */
00173     EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
00174     {
00175       Scalar res;
00176       const Index row = RowsAtCompileTime == 1 ? 0 : index;
00177       const Index col = RowsAtCompileTime == 1 ? index : 0;
00178       ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
00179       return res;
00180     }
00181 
00182     template<int LoadMode>
00183     EIGEN_STRONG_INLINE const PacketScalar packet(Index row, Index col) const
00184     {
00185       PacketScalar res;
00186       internal::product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
00187                               Unroll ? InnerSize-1 : Dynamic,
00188                               _LhsNested, _RhsNested, PacketScalar, LoadMode>
00189         ::run(row, col, m_lhs, m_rhs, res);
00190       return res;
00191     }
00192 
00193     // Implicit conversion to the nested type (trigger the evaluation of the product)
00194     EIGEN_STRONG_INLINE operator const PlainObject& () const
00195     {
00196       m_result.lazyAssign(*this);
00197       return m_result;
00198     }
00199 
00200     const _LhsNested& lhs() const { return m_lhs; }
00201     const _RhsNested& rhs() const { return m_rhs; }
00202 
00203     const Diagonal<const LazyCoeffBasedProductType,0> diagonal() const
00204     { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
00205 
00206     template<int DiagonalIndex>
00207     const Diagonal<const LazyCoeffBasedProductType,DiagonalIndex> diagonal() const
00208     { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
00209 
00210     const Diagonal<const LazyCoeffBasedProductType,Dynamic> diagonal(Index index) const
00211     { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); }
00212 
00213   protected:
00214     typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
00215     typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
00216 
00217     mutable PlainObject m_result;
00218 };
00219 
00220 namespace internal {
00221 
00222 // here we need to overload the nested rule for products
00223 // such that the nested type is a const reference to a plain matrix
00224 template<typename Lhs, typename Rhs, int N, typename PlainObject>
00225 struct nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject>
00226 {
00227   typedef PlainObject const& type;
00228 };
00229 
00230 /***************************************************************************
00231 * Normal product .coeff() implementation (with meta-unrolling)
00232 ***************************************************************************/
00233 
00234 /**************************************
00235 *** Scalar path  - no vectorization ***
00236 **************************************/
00237 
00238 template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
00239 struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
00240 {
00241   typedef typename Lhs::Index Index;
00242   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
00243   {
00244     product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
00245     res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
00246   }
00247 };
00248 
00249 template<typename Lhs, typename Rhs, typename RetScalar>
00250 struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
00251 {
00252   typedef typename Lhs::Index Index;
00253   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
00254   {
00255     res = lhs.coeff(row, 0) * rhs.coeff(0, col);
00256   }
00257 };
00258 
00259 template<typename Lhs, typename Rhs, typename RetScalar>
00260 struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
00261 {
00262   typedef typename Lhs::Index Index;
00263   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
00264   {
00265     eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
00266     res = lhs.coeff(row, 0) * rhs.coeff(0, col);
00267       for(Index i = 1; i < lhs.cols(); ++i)
00268         res += lhs.coeff(row, i) * rhs.coeff(i, col);
00269   }
00270 };
00271 
00272 /*******************************************
00273 *** Scalar path with inner vectorization ***
00274 *******************************************/
00275 
00276 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet>
00277 struct product_coeff_vectorized_unroller
00278 {
00279   typedef typename Lhs::Index Index;
00280   enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
00281   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
00282   {
00283     product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
00284     pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
00285   }
00286 };
00287 
00288 template<typename Lhs, typename Rhs, typename Packet>
00289 struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
00290 {
00291   typedef typename Lhs::Index Index;
00292   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
00293   {
00294     pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
00295   }
00296 };
00297 
00298 template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
00299 struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
00300 {
00301   typedef typename Lhs::PacketScalar Packet;
00302   typedef typename Lhs::Index Index;
00303   enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
00304   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
00305   {
00306     Packet pres;
00307     product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
00308     product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res);
00309     res = predux(pres);
00310   }
00311 };
00312 
00313 template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime>
00314 struct product_coeff_vectorized_dyn_selector
00315 {
00316   typedef typename Lhs::Index Index;
00317   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
00318   {
00319     res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
00320   }
00321 };
00322 
00323 // NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower
00324 // NOTE maybe they are now useless since we have a specialization for Block<Matrix>
00325 template<typename Lhs, typename Rhs, int RhsCols>
00326 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
00327 {
00328   typedef typename Lhs::Index Index;
00329   static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
00330   {
00331     res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
00332   }
00333 };
00334 
00335 template<typename Lhs, typename Rhs, int LhsRows>
00336 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
00337 {
00338   typedef typename Lhs::Index Index;
00339   static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
00340   {
00341     res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
00342   }
00343 };
00344 
00345 template<typename Lhs, typename Rhs>
00346 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
00347 {
00348   typedef typename Lhs::Index Index;
00349   static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
00350   {
00351     res = lhs.transpose().cwiseProduct(rhs).sum();
00352   }
00353 };
00354 
00355 template<typename Lhs, typename Rhs, typename RetScalar>
00356 struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
00357 {
00358   typedef typename Lhs::Index Index;
00359   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
00360   {
00361     product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res);
00362   }
00363 };
00364 
00365 /*******************
00366 *** Packet path  ***
00367 *******************/
00368 
00369 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
00370 struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
00371 {
00372   typedef typename Lhs::Index Index;
00373   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
00374   {
00375     product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
00376     res =  pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
00377   }
00378 };
00379 
00380 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
00381 struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
00382 {
00383   typedef typename Lhs::Index Index;
00384   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
00385   {
00386     product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
00387     res =  pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
00388   }
00389 };
00390 
00391 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
00392 struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
00393 {
00394   typedef typename Lhs::Index Index;
00395   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
00396   {
00397     res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
00398   }
00399 };
00400 
00401 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
00402 struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
00403 {
00404   typedef typename Lhs::Index Index;
00405   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
00406   {
00407     res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
00408   }
00409 };
00410 
00411 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
00412 struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
00413 {
00414   typedef typename Lhs::Index Index;
00415   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
00416   {
00417     eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
00418     res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
00419       for(Index i = 1; i < lhs.cols(); ++i)
00420         res =  pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
00421   }
00422 };
00423 
00424 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
00425 struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
00426 {
00427   typedef typename Lhs::Index Index;
00428   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
00429   {
00430     eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
00431     res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
00432       for(Index i = 1; i < lhs.cols(); ++i)
00433         res =  pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
00434   }
00435 };
00436 
00437 } // end namespace internal
00438 
00439 } // end namespace Eigen
00440 
00441 #endif // EIGEN_COEFFBASED_PRODUCT_H


win_eigen
Author(s): Daniel Stonier
autogenerated on Mon Oct 6 2014 12:24:15