Redux.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
5 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 #ifndef EIGEN_REDUX_H
12 #define EIGEN_REDUX_H
13 
14 namespace Eigen {
15 
16 namespace internal {
17 
18 // TODO
19 // * implement other kind of vectorization
20 // * factorize code
21 
22 /***************************************************************************
23 * Part 1 : the logic deciding a strategy for vectorization and unrolling
24 ***************************************************************************/
25 
26 template<typename Func, typename Evaluator>
28 {
29 public:
31  enum {
33  InnerMaxSize = int(Evaluator::IsRowMajor)
34  ? Evaluator::MaxColsAtCompileTime
35  : Evaluator::MaxRowsAtCompileTime,
36  OuterMaxSize = int(Evaluator::IsRowMajor)
37  ? Evaluator::MaxRowsAtCompileTime
38  : Evaluator::MaxColsAtCompileTime,
41  : (int(InnerMaxSize)/int(PacketSize)) * int(OuterMaxSize)
42  };
43 
44  enum {
47  MayLinearVectorize = bool(MightVectorize) && (int(Evaluator::Flags)&LinearAccessBit),
49  };
50 
51 public:
52  enum {
56  };
57 
58 public:
59  enum {
60  Cost = Evaluator::SizeAtCompileTime == Dynamic ? HugeCost
61  : int(Evaluator::SizeAtCompileTime) * int(Evaluator::CoeffReadCost) + (Evaluator::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
63  };
64 
65 public:
66  enum {
68  };
69 
70 #ifdef EIGEN_DEBUG_ASSIGN
71  static void debug()
72  {
73  std::cerr << "Xpr: " << typeid(typename Evaluator::XprType).name() << std::endl;
74  std::cerr.setf(std::ios::hex, std::ios::basefield);
75  EIGEN_DEBUG_VAR(Evaluator::Flags)
76  std::cerr.unsetf(std::ios::hex);
84  std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
86  std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
87  std::cerr << std::endl;
88  }
89 #endif
90 };
91 
92 /***************************************************************************
93 * Part 2 : unrollers
94 ***************************************************************************/
95 
96 /*** no vectorization ***/
97 
98 template<typename Func, typename Evaluator, int Start, int Length>
100 {
101  enum {
102  HalfLength = Length/2
103  };
104 
105  typedef typename Evaluator::Scalar Scalar;
106 
108  static EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func& func)
109  {
112  }
113 };
114 
115 template<typename Func, typename Evaluator, int Start>
116 struct redux_novec_unroller<Func, Evaluator, Start, 1>
117 {
118  enum {
119  outer = Start / Evaluator::InnerSizeAtCompileTime,
120  inner = Start % Evaluator::InnerSizeAtCompileTime
121  };
122 
123  typedef typename Evaluator::Scalar Scalar;
124 
126  static EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func&)
127  {
128  return eval.coeffByOuterInner(outer, inner);
129  }
130 };
131 
132 // This is actually dead code and will never be called. It is required
133 // to prevent false warnings regarding failed inlining though
134 // for 0 length run() will never be called at all.
135 template<typename Func, typename Evaluator, int Start>
136 struct redux_novec_unroller<Func, Evaluator, Start, 0>
137 {
138  typedef typename Evaluator::Scalar Scalar;
140  static EIGEN_STRONG_INLINE Scalar run(const Evaluator&, const Func&) { return Scalar(); }
141 };
142 
143 /*** vectorization ***/
144 
145 template<typename Func, typename Evaluator, int Start, int Length>
147 {
148  template<typename PacketType>
150  static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func& func)
151  {
152  enum {
154  HalfLength = Length/2
155  };
156 
157  return func.packetOp(
160  }
161 };
162 
163 template<typename Func, typename Evaluator, int Start>
164 struct redux_vec_unroller<Func, Evaluator, Start, 1>
165 {
166  template<typename PacketType>
168  static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func&)
169  {
170  enum {
172  index = Start * PacketSize,
173  outer = index / int(Evaluator::InnerSizeAtCompileTime),
174  inner = index % int(Evaluator::InnerSizeAtCompileTime),
175  alignment = Evaluator::Alignment
176  };
177  return eval.template packetByOuterInner<alignment,PacketType>(outer, inner);
178  }
179 };
180 
181 /***************************************************************************
182 * Part 3 : implementation of all cases
183 ***************************************************************************/
184 
185 template<typename Func, typename Evaluator,
188 >
189 struct redux_impl;
190 
191 template<typename Func, typename Evaluator>
192 struct redux_impl<Func, Evaluator, DefaultTraversal, NoUnrolling>
193 {
194  typedef typename Evaluator::Scalar Scalar;
195 
196  template<typename XprType>
198  Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
199  {
200  eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
201  Scalar res;
202  res = eval.coeffByOuterInner(0, 0);
203  for(Index i = 1; i < xpr.innerSize(); ++i)
204  res = func(res, eval.coeffByOuterInner(0, i));
205  for(Index i = 1; i < xpr.outerSize(); ++i)
206  for(Index j = 0; j < xpr.innerSize(); ++j)
207  res = func(res, eval.coeffByOuterInner(i, j));
208  return res;
209  }
210 };
211 
212 template<typename Func, typename Evaluator>
214  : redux_novec_unroller<Func,Evaluator, 0, Evaluator::SizeAtCompileTime>
215 {
217  typedef typename Evaluator::Scalar Scalar;
218  template<typename XprType>
220  Scalar run(const Evaluator &eval, const Func& func, const XprType& /*xpr*/)
221  {
222  return Base::run(eval,func);
223  }
224 };
225 
226 template<typename Func, typename Evaluator>
228 {
229  typedef typename Evaluator::Scalar Scalar;
231 
232  template<typename XprType>
233  static Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
234  {
235  const Index size = xpr.size();
236 
238  const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
239  enum {
240  alignment0 = (bool(Evaluator::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
241  alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Evaluator::Alignment)
242  };
243  const Index alignedStart = internal::first_default_aligned(xpr);
244  const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
245  const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
246  const Index alignedEnd2 = alignedStart + alignedSize2;
247  const Index alignedEnd = alignedStart + alignedSize;
248  Scalar res;
249  if(alignedSize)
250  {
251  PacketScalar packet_res0 = eval.template packet<alignment,PacketScalar>(alignedStart);
252  if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
253  {
254  PacketScalar packet_res1 = eval.template packet<alignment,PacketScalar>(alignedStart+packetSize);
255  for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
256  {
257  packet_res0 = func.packetOp(packet_res0, eval.template packet<alignment,PacketScalar>(index));
258  packet_res1 = func.packetOp(packet_res1, eval.template packet<alignment,PacketScalar>(index+packetSize));
259  }
260 
261  packet_res0 = func.packetOp(packet_res0,packet_res1);
262  if(alignedEnd>alignedEnd2)
263  packet_res0 = func.packetOp(packet_res0, eval.template packet<alignment,PacketScalar>(alignedEnd2));
264  }
265  res = func.predux(packet_res0);
266 
267  for(Index index = 0; index < alignedStart; ++index)
268  res = func(res,eval.coeff(index));
269 
270  for(Index index = alignedEnd; index < size; ++index)
271  res = func(res,eval.coeff(index));
272  }
273  else // too small to vectorize anything.
274  // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
275  {
276  res = eval.coeff(0);
277  for(Index index = 1; index < size; ++index)
278  res = func(res,eval.coeff(index));
279  }
280 
281  return res;
282  }
283 };
284 
285 // NOTE: for SliceVectorizedTraversal we simply bypass unrolling
286 template<typename Func, typename Evaluator, int Unrolling>
287 struct redux_impl<Func, Evaluator, SliceVectorizedTraversal, Unrolling>
288 {
289  typedef typename Evaluator::Scalar Scalar;
291 
292  template<typename XprType>
293  EIGEN_DEVICE_FUNC static Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
294  {
295  eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
296  const Index innerSize = xpr.innerSize();
297  const Index outerSize = xpr.outerSize();
298  enum {
300  };
301  const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
302  Scalar res;
303  if(packetedInnerSize)
304  {
305  PacketType packet_res = eval.template packet<Unaligned,PacketType>(0,0);
306  for(Index j=0; j<outerSize; ++j)
307  for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
308  packet_res = func.packetOp(packet_res, eval.template packetByOuterInner<Unaligned,PacketType>(j,i));
309 
310  res = func.predux(packet_res);
311  for(Index j=0; j<outerSize; ++j)
312  for(Index i=packetedInnerSize; i<innerSize; ++i)
313  res = func(res, eval.coeffByOuterInner(j,i));
314  }
315  else // too small to vectorize anything.
316  // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
317  {
319  }
320 
321  return res;
322  }
323 };
324 
325 template<typename Func, typename Evaluator>
327 {
328  typedef typename Evaluator::Scalar Scalar;
329 
331  enum {
333  Size = Evaluator::SizeAtCompileTime,
334  VectorizedSize = (int(Size) / int(PacketSize)) * int(PacketSize)
335  };
336 
337  template<typename XprType>
339  Scalar run(const Evaluator &eval, const Func& func, const XprType &xpr)
340  {
342  eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
343  if (VectorizedSize > 0) {
344  Scalar res = func.predux(redux_vec_unroller<Func, Evaluator, 0, Size / PacketSize>::template run<PacketType>(eval,func));
345  if (VectorizedSize != Size)
347  return res;
348  }
349  else {
351  }
352  }
353 };
354 
355 // evaluator adaptor
356 template<typename _XprType>
357 class redux_evaluator : public internal::evaluator<_XprType>
358 {
360 public:
361  typedef _XprType XprType;
363  explicit redux_evaluator(const XprType &xpr) : Base(xpr) {}
364 
365  typedef typename XprType::Scalar Scalar;
366  typedef typename XprType::CoeffReturnType CoeffReturnType;
367  typedef typename XprType::PacketScalar PacketScalar;
368 
369  enum {
370  MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
371  MaxColsAtCompileTime = XprType::MaxColsAtCompileTime,
372  // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator
373  Flags = Base::Flags & ~DirectAccessBit,
374  IsRowMajor = XprType::IsRowMajor,
375  SizeAtCompileTime = XprType::SizeAtCompileTime,
376  InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime
377  };
378 
380  CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
381  { return Base::coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
382 
383  template<int LoadMode, typename PacketType>
385  PacketType packetByOuterInner(Index outer, Index inner) const
386  { return Base::template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
387 
388 };
389 
390 } // end namespace internal
391 
392 /***************************************************************************
393 * Part 4 : public API
394 ***************************************************************************/
395 
396 
406 template<typename Derived>
407 template<typename Func>
409 DenseBase<Derived>::redux(const Func& func) const
410 {
411  eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
412 
413  typedef typename internal::redux_evaluator<Derived> ThisEvaluator;
414  ThisEvaluator thisEval(derived());
415 
416  // The initial expression is passed to the reducer as an additional argument instead of
417  // passing it as a member of redux_evaluator to help
418  return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func, derived());
419 }
420 
428 template<typename Derived>
429 template<int NaNPropagation>
432 {
434 }
435 
443 template<typename Derived>
444 template<int NaNPropagation>
447 {
449 }
450 
457 template<typename Derived>
460 {
461  if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
462  return Scalar(0);
463  return derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>());
464 }
465 
470 template<typename Derived>
473 {
474 #ifdef __INTEL_COMPILER
475  #pragma warning push
476  #pragma warning ( disable : 2259 )
477 #endif
478  return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>())) / Scalar(this->size());
479 #ifdef __INTEL_COMPILER
480  #pragma warning pop
481 #endif
482 }
483 
491 template<typename Derived>
494 {
495  if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
496  return Scalar(1);
497  return derived().redux(Eigen::internal::scalar_product_op<Scalar>());
498 }
499 
506 template<typename Derived>
509 {
510  return derived().diagonal().sum();
511 }
512 
513 } // end namespace Eigen
514 
515 #endif // EIGEN_REDUX_H
XprType::CoeffReturnType CoeffReturnType
Definition: Redux.h:366
find_best_packet_helper< Size, typename packet_traits< T >::type >::type type
Definition: XprHelper.h:208
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func &func)
Definition: Redux.h:150
XprType::PacketScalar PacketScalar
Definition: Redux.h:367
SCALAR Scalar
Definition: bench_gemm.cpp:46
const unsigned int ActualPacketAccessBit
Definition: Constants.h:107
#define EIGEN_STRONG_INLINE
Definition: Macros.h:917
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func &func, const XprType &)
Definition: Redux.h:220
const int HugeCost
Definition: Constants.h:44
#define EIGEN_DEBUG_VAR(x)
Definition: Macros.h:898
const unsigned int DirectAccessBit
Definition: Constants.h:155
XprType::Scalar Scalar
Definition: Redux.h:365
#define EIGEN_PLAIN_ENUM_MAX(a, b)
Definition: Macros.h:1289
Namespace containing all symbols from the Eigen library.
Definition: jet.h:637
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func &func, const XprType &xpr)
Definition: Redux.h:339
internal::evaluator< _XprType > Base
Definition: Redux.h:359
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetByOuterInner(Index outer, Index inner) const
Definition: Redux.h:385
EIGEN_DEVICE_FUNC Scalar redux(const BinaryOp &func) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT
static constexpr bool debug
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func &func)
Definition: Redux.h:108
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE redux_evaluator(const XprType &xpr)
Definition: Redux.h:363
static Index first_default_aligned(const DenseBase< Derived > &m)
EIGEN_DEVICE_FUNC Scalar trace() const
Definition: Redux.h:508
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
static Scalar run(const Evaluator &eval, const Func &func, const XprType &xpr)
Definition: Redux.h:233
Generic expression where a coefficient-wise binary operator is applied to two expressions.
Definition: CwiseBinaryOp.h:77
EIGEN_DEVICE_FUNC Scalar mean() const
Definition: Redux.h:472
EIGEN_DEVICE_FUNC Scalar prod() const
Definition: Redux.h:493
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:74
EIGEN_DEVICE_FUNC internal::traits< Derived >::Scalar maxCoeff() const
#define eigen_assert(x)
Definition: Macros.h:1037
EIGEN_DEVICE_FUNC internal::traits< Derived >::Scalar minCoeff() const
static EIGEN_DEVICE_FUNC Scalar run(const Evaluator &eval, const Func &func, const XprType &xpr)
Definition: Redux.h:293
EIGEN_DEVICE_FUNC Scalar sum() const
Definition: Redux.h:459
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
Definition: Redux.h:380
find_best_packet< typename Evaluator::Scalar, Evaluator::SizeAtCompileTime >::type PacketType
Definition: Redux.h:30
EIGEN_CONSTEXPR Index size(const T &x)
Definition: Meta.h:479
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:976
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func &)
Definition: Redux.h:126
int func(const int &a)
Definition: testDSF.cpp:221
CwiseBinaryOp< internal::scalar_sum_op< double, double >, const CpyMatrixXd, const CpyMatrixXd > XprType
Definition: nestbyvalue.cpp:15
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Evaluator &, const Func &)
Definition: Redux.h:140
redux_novec_unroller< Func, Evaluator, 0, Evaluator::SizeAtCompileTime > Base
Definition: Redux.h:216
const int Dynamic
Definition: Constants.h:22
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func &)
Definition: Redux.h:168
const unsigned int LinearAccessBit
Definition: Constants.h:130
std::ptrdiff_t j
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func &func, const XprType &xpr)
Definition: Redux.h:198
#define EIGEN_UNROLLING_LIMIT
Definition: Settings.h:24
#define EIGEN_ONLY_USED_FOR_DEBUG(x)
Definition: Macros.h:1049


gtsam
Author(s):
autogenerated on Tue Jul 4 2023 02:35:31