00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #ifndef EIGEN_COEFFBASED_PRODUCT_H
00012 #define EIGEN_COEFFBASED_PRODUCT_H
00013
00014 namespace Eigen {
00015
00016 namespace internal {
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031 template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
00032 struct product_coeff_impl;
00033
00034 template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
00035 struct product_packet_impl;
00036
00037 template<typename LhsNested, typename RhsNested, int NestingFlags>
00038 struct traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
00039 {
00040 typedef MatrixXpr XprKind;
00041 typedef typename remove_all<LhsNested>::type _LhsNested;
00042 typedef typename remove_all<RhsNested>::type _RhsNested;
00043 typedef typename scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar;
00044 typedef typename promote_storage_type<typename traits<_LhsNested>::StorageKind,
00045 typename traits<_RhsNested>::StorageKind>::ret StorageKind;
00046 typedef typename promote_index_type<typename traits<_LhsNested>::Index,
00047 typename traits<_RhsNested>::Index>::type Index;
00048
00049 enum {
00050 LhsCoeffReadCost = _LhsNested::CoeffReadCost,
00051 RhsCoeffReadCost = _RhsNested::CoeffReadCost,
00052 LhsFlags = _LhsNested::Flags,
00053 RhsFlags = _RhsNested::Flags,
00054
00055 RowsAtCompileTime = _LhsNested::RowsAtCompileTime,
00056 ColsAtCompileTime = _RhsNested::ColsAtCompileTime,
00057 InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime),
00058
00059 MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime,
00060 MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime,
00061
00062 LhsRowMajor = LhsFlags & RowMajorBit,
00063 RhsRowMajor = RhsFlags & RowMajorBit,
00064
00065 SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value,
00066
00067 CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
00068 && (ColsAtCompileTime == Dynamic
00069 || ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0
00070 && (RhsFlags&AlignedBit)
00071 )
00072 ),
00073
00074 CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
00075 && (RowsAtCompileTime == Dynamic
00076 || ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0
00077 && (LhsFlags&AlignedBit)
00078 )
00079 ),
00080
00081 EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
00082 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
00083 : (RhsRowMajor && !CanVectorizeLhs),
00084
00085 Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
00086 | (EvalToRowMajor ? RowMajorBit : 0)
00087 | NestingFlags
00088 | (LhsFlags & RhsFlags & AlignedBit)
00089
00090 | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),
00091
00092 CoeffReadCost = InnerSize == Dynamic ? Dynamic
00093 : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
00094 + (InnerSize - 1) * NumTraits<Scalar>::AddCost,
00095
00096
00097
00098
00099
00100
00101 CanVectorizeInner = SameType
00102 && LhsRowMajor
00103 && (!RhsRowMajor)
00104 && (LhsFlags & RhsFlags & ActualPacketAccessBit)
00105 && (LhsFlags & RhsFlags & AlignedBit)
00106 && (InnerSize % packet_traits<Scalar>::size == 0)
00107 };
00108 };
00109
00110 }
00111
00112 template<typename LhsNested, typename RhsNested, int NestingFlags>
00113 class CoeffBasedProduct
00114 : internal::no_assignment_operator,
00115 public MatrixBase<CoeffBasedProduct<LhsNested, RhsNested, NestingFlags> >
00116 {
00117 public:
00118
00119 typedef MatrixBase<CoeffBasedProduct> Base;
00120 EIGEN_DENSE_PUBLIC_INTERFACE(CoeffBasedProduct)
00121 typedef typename Base::PlainObject PlainObject;
00122
00123 private:
00124
00125 typedef typename internal::traits<CoeffBasedProduct>::_LhsNested _LhsNested;
00126 typedef typename internal::traits<CoeffBasedProduct>::_RhsNested _RhsNested;
00127
00128 enum {
00129 PacketSize = internal::packet_traits<Scalar>::size,
00130 InnerSize = internal::traits<CoeffBasedProduct>::InnerSize,
00131 Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
00132 CanVectorizeInner = internal::traits<CoeffBasedProduct>::CanVectorizeInner
00133 };
00134
00135 typedef internal::product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
00136 Unroll ? InnerSize-1 : Dynamic,
00137 _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl;
00138
00139 typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType;
00140
00141 public:
00142
00143 inline CoeffBasedProduct(const CoeffBasedProduct& other)
00144 : Base(), m_lhs(other.m_lhs), m_rhs(other.m_rhs)
00145 {}
00146
00147 template<typename Lhs, typename Rhs>
00148 inline CoeffBasedProduct(const Lhs& lhs, const Rhs& rhs)
00149 : m_lhs(lhs), m_rhs(rhs)
00150 {
00151
00152
00153 EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
00154 YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
00155 eigen_assert(lhs.cols() == rhs.rows()
00156 && "invalid matrix product"
00157 && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
00158 }
00159
00160 EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
00161 EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
00162
00163 EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
00164 {
00165 Scalar res;
00166 ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
00167 return res;
00168 }
00169
00170
00171
00172
00173 EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
00174 {
00175 Scalar res;
00176 const Index row = RowsAtCompileTime == 1 ? 0 : index;
00177 const Index col = RowsAtCompileTime == 1 ? index : 0;
00178 ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
00179 return res;
00180 }
00181
00182 template<int LoadMode>
00183 EIGEN_STRONG_INLINE const PacketScalar packet(Index row, Index col) const
00184 {
00185 PacketScalar res;
00186 internal::product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
00187 Unroll ? InnerSize-1 : Dynamic,
00188 _LhsNested, _RhsNested, PacketScalar, LoadMode>
00189 ::run(row, col, m_lhs, m_rhs, res);
00190 return res;
00191 }
00192
00193
00194 EIGEN_STRONG_INLINE operator const PlainObject& () const
00195 {
00196 m_result.lazyAssign(*this);
00197 return m_result;
00198 }
00199
00200 const _LhsNested& lhs() const { return m_lhs; }
00201 const _RhsNested& rhs() const { return m_rhs; }
00202
00203 const Diagonal<const LazyCoeffBasedProductType,0> diagonal() const
00204 { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
00205
00206 template<int DiagonalIndex>
00207 const Diagonal<const LazyCoeffBasedProductType,DiagonalIndex> diagonal() const
00208 { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
00209
00210 const Diagonal<const LazyCoeffBasedProductType,Dynamic> diagonal(Index index) const
00211 { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); }
00212
00213 protected:
00214 typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
00215 typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
00216
00217 mutable PlainObject m_result;
00218 };
00219
00220 namespace internal {
00221
00222
00223
00224 template<typename Lhs, typename Rhs, int N, typename PlainObject>
00225 struct nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject>
00226 {
00227 typedef PlainObject const& type;
00228 };
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238 template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
00239 struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
00240 {
00241 typedef typename Lhs::Index Index;
00242 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
00243 {
00244 product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
00245 res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
00246 }
00247 };
00248
00249 template<typename Lhs, typename Rhs, typename RetScalar>
00250 struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
00251 {
00252 typedef typename Lhs::Index Index;
00253 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
00254 {
00255 res = lhs.coeff(row, 0) * rhs.coeff(0, col);
00256 }
00257 };
00258
00259 template<typename Lhs, typename Rhs, typename RetScalar>
00260 struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
00261 {
00262 typedef typename Lhs::Index Index;
00263 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
00264 {
00265 eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
00266 res = lhs.coeff(row, 0) * rhs.coeff(0, col);
00267 for(Index i = 1; i < lhs.cols(); ++i)
00268 res += lhs.coeff(row, i) * rhs.coeff(i, col);
00269 }
00270 };
00271
00272
00273
00274
00275
00276 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet>
00277 struct product_coeff_vectorized_unroller
00278 {
00279 typedef typename Lhs::Index Index;
00280 enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
00281 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
00282 {
00283 product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
00284 pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
00285 }
00286 };
00287
00288 template<typename Lhs, typename Rhs, typename Packet>
00289 struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
00290 {
00291 typedef typename Lhs::Index Index;
00292 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
00293 {
00294 pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
00295 }
00296 };
00297
00298 template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
00299 struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
00300 {
00301 typedef typename Lhs::PacketScalar Packet;
00302 typedef typename Lhs::Index Index;
00303 enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
00304 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
00305 {
00306 Packet pres;
00307 product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
00308 product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res);
00309 res = predux(pres);
00310 }
00311 };
00312
00313 template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime>
00314 struct product_coeff_vectorized_dyn_selector
00315 {
00316 typedef typename Lhs::Index Index;
00317 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
00318 {
00319 res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
00320 }
00321 };
00322
00323
00324
00325 template<typename Lhs, typename Rhs, int RhsCols>
00326 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
00327 {
00328 typedef typename Lhs::Index Index;
00329 static EIGEN_STRONG_INLINE void run(Index , Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
00330 {
00331 res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
00332 }
00333 };
00334
00335 template<typename Lhs, typename Rhs, int LhsRows>
00336 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
00337 {
00338 typedef typename Lhs::Index Index;
00339 static EIGEN_STRONG_INLINE void run(Index row, Index , const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
00340 {
00341 res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
00342 }
00343 };
00344
00345 template<typename Lhs, typename Rhs>
00346 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
00347 {
00348 typedef typename Lhs::Index Index;
00349 static EIGEN_STRONG_INLINE void run(Index , Index , const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
00350 {
00351 res = lhs.transpose().cwiseProduct(rhs).sum();
00352 }
00353 };
00354
00355 template<typename Lhs, typename Rhs, typename RetScalar>
00356 struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
00357 {
00358 typedef typename Lhs::Index Index;
00359 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
00360 {
00361 product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res);
00362 }
00363 };
00364
00365
00366
00367
00368
00369 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
00370 struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
00371 {
00372 typedef typename Lhs::Index Index;
00373 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
00374 {
00375 product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
00376 res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
00377 }
00378 };
00379
00380 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
00381 struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
00382 {
00383 typedef typename Lhs::Index Index;
00384 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
00385 {
00386 product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
00387 res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
00388 }
00389 };
00390
00391 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
00392 struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
00393 {
00394 typedef typename Lhs::Index Index;
00395 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
00396 {
00397 res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
00398 }
00399 };
00400
00401 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
00402 struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
00403 {
00404 typedef typename Lhs::Index Index;
00405 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
00406 {
00407 res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
00408 }
00409 };
00410
00411 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
00412 struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
00413 {
00414 typedef typename Lhs::Index Index;
00415 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
00416 {
00417 eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
00418 res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
00419 for(Index i = 1; i < lhs.cols(); ++i)
00420 res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
00421 }
00422 };
00423
00424 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
00425 struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
00426 {
00427 typedef typename Lhs::Index Index;
00428 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
00429 {
00430 eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
00431 res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
00432 for(Index i = 1; i < lhs.cols(); ++i)
00433 res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
00434 }
00435 };
00436
00437 }
00438
00439 }
00440
00441 #endif // EIGEN_COEFFBASED_PRODUCT_H