GeneralProduct.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
5 // Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 #ifndef EIGEN_GENERAL_PRODUCT_H
12 #define EIGEN_GENERAL_PRODUCT_H
13 
14 namespace Eigen {
15 
16 enum {
17  Large = 2,
18  Small = 3
19 };
20 
21 namespace internal {
22 
23 template<int Rows, int Cols, int Depth> struct product_type_selector;
24 
25 template<int Size, int MaxSize> struct product_size_category
26 {
27  enum { is_large = MaxSize == Dynamic ||
30  value = is_large ? Large
31  : Size == 1 ? 1
32  : Small
33  };
34 };
35 
36 template<typename Lhs, typename Rhs> struct product_type
37 {
38  typedef typename remove_all<Lhs>::type _Lhs;
39  typedef typename remove_all<Rhs>::type _Rhs;
40  enum {
49  };
50 
51  // the splitting into different lines of code here, introducing the _select enums and the typedef below,
52  // is to work around an internal compiler error with gcc 4.1 and 4.2.
53 private:
54  enum {
58  };
60 
61 public:
62  enum {
63  value = selector::ret,
64  ret = selector::ret
65  };
66 #ifdef EIGEN_DEBUG_PRODUCT
67  static void debug()
68  {
69  EIGEN_DEBUG_VAR(Rows);
70  EIGEN_DEBUG_VAR(Cols);
71  EIGEN_DEBUG_VAR(Depth);
72  EIGEN_DEBUG_VAR(rows_select);
73  EIGEN_DEBUG_VAR(cols_select);
74  EIGEN_DEBUG_VAR(depth_select);
75  EIGEN_DEBUG_VAR(value);
76  }
77 #endif
78 };
79 
80 /* The following allows to select the kind of product at compile time
81  * based on the three dimensions of the product.
82  * This is a compile time mapping from {1,Small,Large}^3 -> {product types} */
83 // FIXME I'm not sure the current mapping is the ideal one.
84 template<int M, int N> struct product_type_selector<M,N,1> { enum { ret = OuterProduct }; };
85 template<int M> struct product_type_selector<M, 1, 1> { enum { ret = LazyCoeffBasedProductMode }; };
86 template<int N> struct product_type_selector<1, N, 1> { enum { ret = LazyCoeffBasedProductMode }; };
87 template<int Depth> struct product_type_selector<1, 1, Depth> { enum { ret = InnerProduct }; };
88 template<> struct product_type_selector<1, 1, 1> { enum { ret = InnerProduct }; };
89 template<> struct product_type_selector<Small,1, Small> { enum { ret = CoeffBasedProductMode }; };
90 template<> struct product_type_selector<1, Small,Small> { enum { ret = CoeffBasedProductMode }; };
91 template<> struct product_type_selector<Small,Small,Small> { enum { ret = CoeffBasedProductMode }; };
92 template<> struct product_type_selector<Small, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
93 template<> struct product_type_selector<Small, Large, 1> { enum { ret = LazyCoeffBasedProductMode }; };
94 template<> struct product_type_selector<Large, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
95 template<> struct product_type_selector<1, Large,Small> { enum { ret = CoeffBasedProductMode }; };
96 template<> struct product_type_selector<1, Large,Large> { enum { ret = GemvProduct }; };
97 template<> struct product_type_selector<1, Small,Large> { enum { ret = CoeffBasedProductMode }; };
98 template<> struct product_type_selector<Large,1, Small> { enum { ret = CoeffBasedProductMode }; };
99 template<> struct product_type_selector<Large,1, Large> { enum { ret = GemvProduct }; };
100 template<> struct product_type_selector<Small,1, Large> { enum { ret = CoeffBasedProductMode }; };
101 template<> struct product_type_selector<Small,Small,Large> { enum { ret = GemmProduct }; };
102 template<> struct product_type_selector<Large,Small,Large> { enum { ret = GemmProduct }; };
103 template<> struct product_type_selector<Small,Large,Large> { enum { ret = GemmProduct }; };
104 template<> struct product_type_selector<Large,Large,Large> { enum { ret = GemmProduct }; };
105 template<> struct product_type_selector<Large,Small,Small> { enum { ret = CoeffBasedProductMode }; };
106 template<> struct product_type_selector<Small,Large,Small> { enum { ret = CoeffBasedProductMode }; };
107 template<> struct product_type_selector<Large,Large,Small> { enum { ret = GemmProduct }; };
108 
109 } // end namespace internal
110 
111 /***********************************************************************
112 * Implementation of Inner Vector Vector Product
113 ***********************************************************************/
114 
115 // FIXME : maybe the "inner product" could return a Scalar
116 // instead of a 1x1 matrix ??
117 // Pro: more natural for the user
118 // Cons: this could be a problem if in a meta unrolled algorithm a matrix-matrix
119 // product ends up to a row-vector times col-vector product... To tackle this use
120 // case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x);
121 
122 /***********************************************************************
123 * Implementation of Outer Vector Vector Product
124 ***********************************************************************/
125 
126 /***********************************************************************
127 * Implementation of General Matrix Vector Product
128 ***********************************************************************/
129 
130 /* According to the shape/flags of the matrix we have to distinghish 3 different cases:
131  * 1 - the matrix is col-major, BLAS compatible and M is large => call fast BLAS-like colmajor routine
132  * 2 - the matrix is row-major, BLAS compatible and N is large => call fast BLAS-like rowmajor routine
133  * 3 - all other cases are handled using a simple loop along the outer-storage direction.
134  * Therefore we need a lower level meta selector.
135  * Furthermore, if the matrix is the rhs, then the product has to be transposed.
136  */
137 namespace internal {
138 
139 template<int Side, int StorageOrder, bool BlasCompatible>
141 
142 } // end namespace internal
143 
144 namespace internal {
145 
146 template<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vector_if;
147 
148 template<typename Scalar,int Size,int MaxSize>
149 struct gemv_static_vector_if<Scalar,Size,MaxSize,false>
150 {
151  EIGEN_STRONG_INLINE Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; }
152 };
153 
154 template<typename Scalar,int Size>
155 struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
156 {
157  EIGEN_STRONG_INLINE Scalar* data() { return 0; }
158 };
159 
160 template<typename Scalar,int Size,int MaxSize>
161 struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
162 {
163  enum {
166  };
167  #if EIGEN_MAX_STATIC_ALIGN_BYTES!=0
169  EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
170  #else
171  // Some architectures cannot align on the stack,
172  // => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
175  return ForceAlignment
176  ? reinterpret_cast<Scalar*>((internal::UIntPtr(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES)
177  : m_data.array;
178  }
179  #endif
180 };
181 
182 // The vector is on the left => transposition
183 template<int StorageOrder, bool BlasCompatible>
184 struct gemv_dense_selector<OnTheLeft,StorageOrder,BlasCompatible>
185 {
186  template<typename Lhs, typename Rhs, typename Dest>
187  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
188  {
189  Transpose<Dest> destT(dest);
190  enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
192  ::run(rhs.transpose(), lhs.transpose(), destT, alpha);
193  }
194 };
195 
196 template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
197 {
198  template<typename Lhs, typename Rhs, typename Dest>
199  static inline void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
200  {
201  typedef typename Lhs::Scalar LhsScalar;
202  typedef typename Rhs::Scalar RhsScalar;
203  typedef typename Dest::Scalar ResScalar;
204  typedef typename Dest::RealScalar RealScalar;
205 
206  typedef internal::blas_traits<Lhs> LhsBlasTraits;
207  typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
208  typedef internal::blas_traits<Rhs> RhsBlasTraits;
209  typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
210 
212 
213  ActualLhsType actualLhs = LhsBlasTraits::extract(lhs);
214  ActualRhsType actualRhs = RhsBlasTraits::extract(rhs);
215 
216  ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
217  * RhsBlasTraits::extractScalarFactor(rhs);
218 
219  // make sure Dest is a compile-time vector type (bug 1166)
221 
222  enum {
223  // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
224  // on, the other hand it is good for the cache to pack the vector anyways...
225  EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1),
227  MightCannotUseDest = (!EvalToDestAtCompileTime) || ComplexByReal
228  };
229 
232  RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
233 
234  if(!MightCannotUseDest)
235  {
236  // shortcut if we are sure to be able to use dest directly,
237  // this ease the compiler to generate cleaner and more optimzized code for most common cases
239  <Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
240  actualLhs.rows(), actualLhs.cols(),
241  LhsMapper(actualLhs.data(), actualLhs.outerStride()),
242  RhsMapper(actualRhs.data(), actualRhs.innerStride()),
243  dest.data(), 1,
244  compatibleAlpha);
245  }
246  else
247  {
249 
250  const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
251  const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
252 
253  ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
254  evalToDest ? dest.data() : static_dest.data());
255 
256  if(!evalToDest)
257  {
258  #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
259  Index size = dest.size();
260  EIGEN_DENSE_STORAGE_CTOR_PLUGIN
261  #endif
262  if(!alphaIsCompatible)
263  {
264  MappedDest(actualDestPtr, dest.size()).setZero();
265  compatibleAlpha = RhsScalar(1);
266  }
267  else
268  MappedDest(actualDestPtr, dest.size()) = dest;
269  }
270 
272  <Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
273  actualLhs.rows(), actualLhs.cols(),
274  LhsMapper(actualLhs.data(), actualLhs.outerStride()),
275  RhsMapper(actualRhs.data(), actualRhs.innerStride()),
276  actualDestPtr, 1,
277  compatibleAlpha);
278 
279  if (!evalToDest)
280  {
281  if(!alphaIsCompatible)
282  dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size());
283  else
284  dest = MappedDest(actualDestPtr, dest.size());
285  }
286  }
287  }
288 };
289 
290 template<> struct gemv_dense_selector<OnTheRight,RowMajor,true>
291 {
292  template<typename Lhs, typename Rhs, typename Dest>
293  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
294  {
295  typedef typename Lhs::Scalar LhsScalar;
296  typedef typename Rhs::Scalar RhsScalar;
297  typedef typename Dest::Scalar ResScalar;
298 
299  typedef internal::blas_traits<Lhs> LhsBlasTraits;
300  typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
301  typedef internal::blas_traits<Rhs> RhsBlasTraits;
302  typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
303  typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
304 
305  typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
306  typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
307 
308  ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
309  * RhsBlasTraits::extractScalarFactor(rhs);
310 
311  enum {
312  // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
313  // on, the other hand it is good for the cache to pack the vector anyways...
314  DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1
315  };
316 
318 
319  ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
320  DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
321 
322  if(!DirectlyUseRhs)
323  {
324  #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
325  Index size = actualRhs.size();
326  EIGEN_DENSE_STORAGE_CTOR_PLUGIN
327  #endif
328  Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
329  }
330 
334  <Index,LhsScalar,LhsMapper,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
335  actualLhs.rows(), actualLhs.cols(),
336  LhsMapper(actualLhs.data(), actualLhs.outerStride()),
337  RhsMapper(actualRhsPtr, 1),
338  dest.data(), dest.col(0).innerStride(), //NOTE if dest is not a vector at compile-time, then dest.innerStride() might be wrong. (bug 1166)
339  actualAlpha);
340  }
341 };
342 
343 template<> struct gemv_dense_selector<OnTheRight,ColMajor,false>
344 {
345  template<typename Lhs, typename Rhs, typename Dest>
346  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
347  {
348  EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
349  // TODO if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory, otherwise use a temp
350  typename nested_eval<Rhs,1>::type actual_rhs(rhs);
351  const Index size = rhs.rows();
352  for(Index k=0; k<size; ++k)
353  dest += (alpha*actual_rhs.coeff(k)) * lhs.col(k);
354  }
355 };
356 
357 template<> struct gemv_dense_selector<OnTheRight,RowMajor,false>
358 {
359  template<typename Lhs, typename Rhs, typename Dest>
360  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
361  {
362  EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
363  typename nested_eval<Rhs,Lhs::RowsAtCompileTime>::type actual_rhs(rhs);
364  const Index rows = dest.rows();
365  for(Index i=0; i<rows; ++i)
366  dest.coeffRef(i) += alpha * (lhs.row(i).cwiseProduct(actual_rhs.transpose())).sum();
367  }
368 };
369 
370 } // end namespace internal
371 
372 /***************************************************************************
373 * Implementation of matrix base methods
374 ***************************************************************************/
375 
382 #ifndef __CUDACC__
383 
384 template<typename Derived>
385 template<typename OtherDerived>
388 {
389  // A note regarding the function declaration: In MSVC, this function will sometimes
390  // not be inlined since DenseStorage is an unwindable object for dynamic
391  // matrices and product types are holding a member to store the result.
392  // Thus it does not help tagging this function with EIGEN_STRONG_INLINE.
393  enum {
394  ProductIsValid = Derived::ColsAtCompileTime==Dynamic
395  || OtherDerived::RowsAtCompileTime==Dynamic
396  || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
397  AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
398  SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
399  };
400  // note to the lost user:
401  // * for a dot product use: v1.dot(v2)
402  // * for a coeff-wise product use: v1.cwiseProduct(v2)
403  EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
404  INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
405  EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
406  INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
407  EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
408 #ifdef EIGEN_DEBUG_PRODUCT
410 #endif
411 
412  return Product<Derived, OtherDerived>(derived(), other.derived());
413 }
414 
415 #endif // __CUDACC__
416 
428 template<typename Derived>
429 template<typename OtherDerived>
432 {
433  enum {
434  ProductIsValid = Derived::ColsAtCompileTime==Dynamic
435  || OtherDerived::RowsAtCompileTime==Dynamic
436  || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
437  AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
438  SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
439  };
440  // note to the lost user:
441  // * for a dot product use: v1.dot(v2)
442  // * for a coeff-wise product use: v1.cwiseProduct(v2)
443  EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
444  INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
445  EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
446  INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
447  EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
448 
449  return Product<Derived,OtherDerived,LazyProduct>(derived(), other.derived());
450 }
451 
452 } // end namespace Eigen
453 
454 #endif // EIGEN_PRODUCT_H
#define EIGEN_STRONG_INLINE
Definition: Macros.h:493
#define EIGEN_MAX_ALIGN_BYTES
Definition: Macros.h:773
Expression of the product of two arbitrary matrices or vectors.
Definition: Product.h:71
EIGEN_DEVICE_FUNC Index cols() const
Definition: Diagonal.h:85
#define EIGEN_DEBUG_VAR(x)
Definition: Macros.h:475
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
A matrix or vector expression mapping an existing array of data.
Definition: Map.h:88
Expression of the transpose of a matrix.
Definition: Transpose.h:52
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE To run(const From &x)
Definition: BlasUtil.h:127
static void run(const Lhs &lhs, const Rhs &rhs, Dest &dest, const typename Dest::Scalar &alpha)
remove_all< Lhs >::type _Lhs
Definition: LDLT.h:16
static constexpr size_t size(Tuple< Args... > &)
Provides access to the number of elements in a tuple as a compile-time constant expression.
Holds information about the various numeric (i.e. scalar) types allowed by Eigen. ...
Definition: NumTraits.h:150
EIGEN_DEVICE_FUNC ScalarWithConstIfNotLvalue * data()
Definition: Diagonal.h:106
#define EIGEN_STATIC_ASSERT(CONDITION, MSG)
Definition: StaticAssert.h:122
#define EIGEN_SIZE_MIN_PREFER_FIXED(a, b)
Definition: Macros.h:886
std::size_t UIntPtr
Definition: Meta.h:51
void setZero()
internal::plain_array< Scalar, EIGEN_SIZE_MIN_PREFER_FIXED(Size, MaxSize)+(ForceAlignment?EIGEN_MAX_ALIGN_BYTES:0), 0 > m_data
EIGEN_DEVICE_FUNC Index outerStride() const
Definition: Diagonal.h:94
remove_all< Rhs >::type _Rhs
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:33
static void run(const Lhs &lhs, const Rhs &rhs, Dest &dest, const typename Dest::Scalar &alpha)
EIGEN_DEVICE_FUNC const Product< Derived, OtherDerived, LazyProduct > lazyProduct(const MatrixBase< OtherDerived > &other) const
#define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER)
Definition: Memory.h:644
EIGEN_DEVICE_FUNC Index rows() const
Definition: Diagonal.h:78
static void run(const Lhs &lhs, const Rhs &rhs, Dest &dest, const typename Dest::Scalar &alpha)
#define EIGEN_PREDICATE_SAME_MATRIX_SIZE(TYPE0, TYPE1)
Definition: StaticAssert.h:169
#define EIGEN_PLAIN_ENUM_MIN(a, b)
Definition: Macros.h:872
EIGEN_DEVICE_FUNC const ImagReturnType imag() const
Expression of a diagonal/subdiagonal/superdiagonal in a matrix.
Definition: Diagonal.h:63
static void run(const Lhs &lhs, const Rhs &rhs, Dest &dest, const typename Dest::Scalar &alpha)
const int Dynamic
Definition: Constants.h:21
#define eigen_internal_assert(x)
Definition: Macros.h:583
const Product< Derived, OtherDerived > operator*(const MatrixBase< OtherDerived > &other) const
static const int N
Definition: TensorIntDiv.h:84
static void run(const Lhs &lhs, const Rhs &rhs, Dest &dest, const typename Dest::Scalar &alpha)
void run(Expr &expr, Dev &dev)
Definition: TensorSyclRun.h:33
product_type_selector< rows_select, cols_select, depth_select > selector
Base class for all dense matrices, vectors, and expressions.
Definition: MatrixBase.h:48


hebiros
Author(s): Xavier Artache , Matthew Tesch
autogenerated on Thu Sep 3 2020 04:08:13