CoeffBasedProduct.h
Go to the documentation of this file.
00001 // This file is part of Eigen, a lightweight C++ template library
00002 // for linear algebra.
00003 //
00004 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
00005 // Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
00006 //
00007 // Eigen is free software; you can redistribute it and/or
00008 // modify it under the terms of the GNU Lesser General Public
00009 // License as published by the Free Software Foundation; either
00010 // version 3 of the License, or (at your option) any later version.
00011 //
00012 // Alternatively, you can redistribute it and/or
00013 // modify it under the terms of the GNU General Public License as
00014 // published by the Free Software Foundation; either version 2 of
00015 // the License, or (at your option) any later version.
00016 //
00017 // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
00018 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00019 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
00020 // GNU General Public License for more details.
00021 //
00022 // You should have received a copy of the GNU Lesser General Public
00023 // License and a copy of the GNU General Public License along with
00024 // Eigen. If not, see <http://www.gnu.org/licenses/>.
00025 
00026 #ifndef EIGEN_COEFFBASED_PRODUCT_H
00027 #define EIGEN_COEFFBASED_PRODUCT_H
00028 
00029 namespace internal {
00030 
00031 /*********************************************************************************
00032 *  Coefficient based product implementation.
00033 *  It is designed for the following use cases:
00034 *  - small fixed sizes
00035 *  - lazy products
00036 *********************************************************************************/
00037 
00038 /* Since the all the dimensions of the product are small, here we can rely
00039  * on the generic Assign mechanism to evaluate the product per coeff (or packet).
00040  *
00041  * Note that here the inner-loops should always be unrolled.
00042  */
00043 
00044 template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
00045 struct product_coeff_impl;
00046 
00047 template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
00048 struct product_packet_impl;
00049 
00050 template<typename LhsNested, typename RhsNested, int NestingFlags>
00051 struct traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
00052 {
00053   typedef MatrixXpr XprKind;
00054   typedef typename remove_all<LhsNested>::type _LhsNested;
00055   typedef typename remove_all<RhsNested>::type _RhsNested;
00056   typedef typename scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar;
00057   typedef typename promote_storage_type<typename traits<_LhsNested>::StorageKind,
00058                                            typename traits<_RhsNested>::StorageKind>::ret StorageKind;
00059   typedef typename promote_index_type<typename traits<_LhsNested>::Index,
00060                                          typename traits<_RhsNested>::Index>::type Index;
00061 
00062   enum {
00063       LhsCoeffReadCost = _LhsNested::CoeffReadCost,
00064       RhsCoeffReadCost = _RhsNested::CoeffReadCost,
00065       LhsFlags = _LhsNested::Flags,
00066       RhsFlags = _RhsNested::Flags,
00067 
00068       RowsAtCompileTime = _LhsNested::RowsAtCompileTime,
00069       ColsAtCompileTime = _RhsNested::ColsAtCompileTime,
00070       InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime),
00071 
00072       MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime,
00073       MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime,
00074 
00075       LhsRowMajor = LhsFlags & RowMajorBit,
00076       RhsRowMajor = RhsFlags & RowMajorBit,
00077 
00078       SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value,
00079 
00080       CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
00081                       && (ColsAtCompileTime == Dynamic
00082                           || ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0
00083                               && (RhsFlags&AlignedBit)
00084                              )
00085                          ),
00086 
00087       CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
00088                       && (RowsAtCompileTime == Dynamic
00089                           || ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0
00090                               && (LhsFlags&AlignedBit)
00091                              )
00092                          ),
00093 
00094       EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
00095                      : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
00096                      : (RhsRowMajor && !CanVectorizeLhs),
00097 
00098       Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
00099             | (EvalToRowMajor ? RowMajorBit : 0)
00100             | NestingFlags
00101             | (LhsFlags & RhsFlags & AlignedBit)
00102             // TODO enable vectorization for mixed types
00103             | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),
00104 
00105       CoeffReadCost = InnerSize == Dynamic ? Dynamic
00106                     : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
00107                       + (InnerSize - 1) * NumTraits<Scalar>::AddCost,
00108 
00109       /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
00110       * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
00111       * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
00112       * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
00113       */
00114       CanVectorizeInner =    SameType
00115                           && LhsRowMajor
00116                           && (!RhsRowMajor)
00117                           && (LhsFlags & RhsFlags & ActualPacketAccessBit)
00118                           && (LhsFlags & RhsFlags & AlignedBit)
00119                           && (InnerSize % packet_traits<Scalar>::size == 0)
00120     };
00121 };
00122 
00123 } // end namespace internal
00124 
00125 template<typename LhsNested, typename RhsNested, int NestingFlags>
00126 class CoeffBasedProduct
00127   : internal::no_assignment_operator,
00128     public MatrixBase<CoeffBasedProduct<LhsNested, RhsNested, NestingFlags> >
00129 {
00130   public:
00131 
00132     typedef MatrixBase<CoeffBasedProduct> Base;
00133     EIGEN_DENSE_PUBLIC_INTERFACE(CoeffBasedProduct)
00134     typedef typename Base::PlainObject PlainObject;
00135 
00136   private:
00137 
00138     typedef typename internal::traits<CoeffBasedProduct>::_LhsNested _LhsNested;
00139     typedef typename internal::traits<CoeffBasedProduct>::_RhsNested _RhsNested;
00140 
00141     enum {
00142       PacketSize = internal::packet_traits<Scalar>::size,
00143       InnerSize  = internal::traits<CoeffBasedProduct>::InnerSize,
00144       Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
00145       CanVectorizeInner = internal::traits<CoeffBasedProduct>::CanVectorizeInner
00146     };
00147 
00148     typedef internal::product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
00149                                    Unroll ? InnerSize-1 : Dynamic,
00150                                    _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl;
00151 
00152     typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType;
00153 
00154   public:
00155 
00156     inline CoeffBasedProduct(const CoeffBasedProduct& other)
00157       : Base(), m_lhs(other.m_lhs), m_rhs(other.m_rhs)
00158     {}
00159 
00160     template<typename Lhs, typename Rhs>
00161     inline CoeffBasedProduct(const Lhs& lhs, const Rhs& rhs)
00162       : m_lhs(lhs), m_rhs(rhs)
00163     {
00164       // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable.
00165       // We still allow to mix T and complex<T>.
00166       EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
00167         YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
00168       eigen_assert(lhs.cols() == rhs.rows()
00169         && "invalid matrix product"
00170         && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
00171     }
00172 
00173     EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
00174     EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
00175 
00176     EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
00177     {
00178       Scalar res;
00179       ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
00180       return res;
00181     }
00182 
00183     /* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
00184      * which is why we don't set the LinearAccessBit.
00185      */
00186     EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
00187     {
00188       Scalar res;
00189       const Index row = RowsAtCompileTime == 1 ? 0 : index;
00190       const Index col = RowsAtCompileTime == 1 ? index : 0;
00191       ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
00192       return res;
00193     }
00194 
00195     template<int LoadMode>
00196     EIGEN_STRONG_INLINE const PacketScalar packet(Index row, Index col) const
00197     {
00198       PacketScalar res;
00199       internal::product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
00200                               Unroll ? InnerSize-1 : Dynamic,
00201                               _LhsNested, _RhsNested, PacketScalar, LoadMode>
00202         ::run(row, col, m_lhs, m_rhs, res);
00203       return res;
00204     }
00205 
00206     // Implicit conversion to the nested type (trigger the evaluation of the product)
00207     EIGEN_STRONG_INLINE operator const PlainObject& () const
00208     {
00209       m_result.lazyAssign(*this);
00210       return m_result;
00211     }
00212 
00213     const _LhsNested& lhs() const { return m_lhs; }
00214     const _RhsNested& rhs() const { return m_rhs; }
00215 
00216     const Diagonal<const LazyCoeffBasedProductType,0> diagonal() const
00217     { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
00218 
00219     template<int DiagonalIndex>
00220     const Diagonal<const LazyCoeffBasedProductType,DiagonalIndex> diagonal() const
00221     { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
00222 
00223     const Diagonal<const LazyCoeffBasedProductType,Dynamic> diagonal(Index index) const
00224     { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); }
00225 
00226   protected:
00227     const LhsNested m_lhs;
00228     const RhsNested m_rhs;
00229 
00230     mutable PlainObject m_result;
00231 };
00232 
00233 namespace internal {
00234 
00235 // here we need to overload the nested rule for products
00236 // such that the nested type is a const reference to a plain matrix
00237 template<typename Lhs, typename Rhs, int N, typename PlainObject>
00238 struct nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject>
00239 {
00240   typedef PlainObject const& type;
00241 };
00242 
00243 /***************************************************************************
00244 * Normal product .coeff() implementation (with meta-unrolling)
00245 ***************************************************************************/
00246 
00247 /**************************************
00248 *** Scalar path  - no vectorization ***
00249 **************************************/
00250 
00251 template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
00252 struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
00253 {
00254   typedef typename Lhs::Index Index;
00255   EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
00256   {
00257     product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
00258     res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
00259   }
00260 };
00261 
00262 template<typename Lhs, typename Rhs, typename RetScalar>
00263 struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
00264 {
00265   typedef typename Lhs::Index Index;
00266   EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
00267   {
00268     res = lhs.coeff(row, 0) * rhs.coeff(0, col);
00269   }
00270 };
00271 
00272 template<typename Lhs, typename Rhs, typename RetScalar>
00273 struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
00274 {
00275   typedef typename Lhs::Index Index;
00276   EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
00277   {
00278     eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
00279     res = lhs.coeff(row, 0) * rhs.coeff(0, col);
00280       for(Index i = 1; i < lhs.cols(); ++i)
00281         res += lhs.coeff(row, i) * rhs.coeff(i, col);
00282   }
00283 };
00284 
00285 /*******************************************
00286 *** Scalar path with inner vectorization ***
00287 *******************************************/
00288 
00289 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet>
00290 struct product_coeff_vectorized_unroller
00291 {
00292   typedef typename Lhs::Index Index;
00293   enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
00294   EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
00295   {
00296     product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
00297     pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
00298   }
00299 };
00300 
00301 template<typename Lhs, typename Rhs, typename Packet>
00302 struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
00303 {
00304   typedef typename Lhs::Index Index;
00305   EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
00306   {
00307     pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
00308   }
00309 };
00310 
00311 template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
00312 struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
00313 {
00314   typedef typename Lhs::PacketScalar Packet;
00315   typedef typename Lhs::Index Index;
00316   enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
00317   EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
00318   {
00319     Packet pres;
00320     product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
00321     product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res);
00322     res = predux(pres);
00323   }
00324 };
00325 
00326 template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime>
00327 struct product_coeff_vectorized_dyn_selector
00328 {
00329   typedef typename Lhs::Index Index;
00330   EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
00331   {
00332     res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
00333   }
00334 };
00335 
00336 // NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower
00337 // NOTE maybe they are now useless since we have a specialization for Block<Matrix>
00338 template<typename Lhs, typename Rhs, int RhsCols>
00339 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
00340 {
00341   typedef typename Lhs::Index Index;
00342   EIGEN_STRONG_INLINE static void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
00343   {
00344     res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
00345   }
00346 };
00347 
00348 template<typename Lhs, typename Rhs, int LhsRows>
00349 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
00350 {
00351   typedef typename Lhs::Index Index;
00352   EIGEN_STRONG_INLINE static void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
00353   {
00354     res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
00355   }
00356 };
00357 
00358 template<typename Lhs, typename Rhs>
00359 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
00360 {
00361   typedef typename Lhs::Index Index;
00362   EIGEN_STRONG_INLINE static void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
00363   {
00364     res = lhs.transpose().cwiseProduct(rhs).sum();
00365   }
00366 };
00367 
00368 template<typename Lhs, typename Rhs, typename RetScalar>
00369 struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
00370 {
00371   typedef typename Lhs::Index Index;
00372   EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
00373   {
00374     product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res);
00375   }
00376 };
00377 
00378 /*******************
00379 *** Packet path  ***
00380 *******************/
00381 
00382 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
00383 struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
00384 {
00385   typedef typename Lhs::Index Index;
00386   EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
00387   {
00388     product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
00389     res =  pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
00390   }
00391 };
00392 
00393 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
00394 struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
00395 {
00396   typedef typename Lhs::Index Index;
00397   EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
00398   {
00399     product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
00400     res =  pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
00401   }
00402 };
00403 
00404 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
00405 struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
00406 {
00407   typedef typename Lhs::Index Index;
00408   EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
00409   {
00410     res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
00411   }
00412 };
00413 
00414 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
00415 struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
00416 {
00417   typedef typename Lhs::Index Index;
00418   EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
00419   {
00420     res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
00421   }
00422 };
00423 
00424 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
00425 struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
00426 {
00427   typedef typename Lhs::Index Index;
00428   EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
00429   {
00430     eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
00431     res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
00432       for(Index i = 1; i < lhs.cols(); ++i)
00433         res =  pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
00434   }
00435 };
00436 
00437 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
00438 struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
00439 {
00440   typedef typename Lhs::Index Index;
00441   EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
00442   {
00443     eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
00444     res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
00445       for(Index i = 1; i < lhs.cols(); ++i)
00446         res =  pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
00447   }
00448 };
00449 
00450 } // end namespace internal
00451 
00452 #endif // EIGEN_COEFFBASED_PRODUCT_H


re_vision
Author(s): Dorian Galvez-Lopez
autogenerated on Sun Jan 5 2014 11:30:56