Redux.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
5 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 #ifndef EIGEN_REDUX_H
12 #define EIGEN_REDUX_H
13 
14 namespace Eigen {
15 
16 namespace internal {
17 
18 // TODO
19 // * implement other kind of vectorization
20 // * factorize code
21 
22 /***************************************************************************
23 * Part 1 : the logic deciding a strategy for vectorization and unrolling
24 ***************************************************************************/
25 
26 template<typename Func, typename Derived>
28 {
29 public:
31  enum {
33  InnerMaxSize = int(Derived::IsRowMajor)
34  ? Derived::MaxColsAtCompileTime
35  : Derived::MaxRowsAtCompileTime
36  };
37 
38  enum {
41  MayLinearVectorize = bool(MightVectorize) && (int(Derived::Flags)&LinearAccessBit),
42  MaySliceVectorize = bool(MightVectorize) && int(InnerMaxSize)>=3*PacketSize
43  };
44 
45 public:
46  enum {
48  : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
50  };
51 
52 public:
53  enum {
54  Cost = Derived::SizeAtCompileTime == Dynamic ? HugeCost
55  : Derived::SizeAtCompileTime * Derived::CoeffReadCost + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
57  };
58 
59 public:
60  enum {
62  };
63 
64 #ifdef EIGEN_DEBUG_ASSIGN
65  static void debug()
66  {
67  std::cerr << "Xpr: " << typeid(typename Derived::XprType).name() << std::endl;
68  std::cerr.setf(std::ios::hex, std::ios::basefield);
69  EIGEN_DEBUG_VAR(Derived::Flags)
70  std::cerr.unsetf(std::ios::hex);
75  EIGEN_DEBUG_VAR(MaySliceVectorize)
78  EIGEN_DEBUG_VAR(Unrolling)
79  std::cerr << std::endl;
80  }
81 #endif
82 };
83 
84 /***************************************************************************
85 * Part 2 : unrollers
86 ***************************************************************************/
87 
88 /*** no vectorization ***/
89 
90 template<typename Func, typename Derived, int Start, int Length>
92 {
93  enum {
94  HalfLength = Length/2
95  };
96 
97  typedef typename Derived::Scalar Scalar;
98 
99  EIGEN_DEVICE_FUNC
100  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
101  {
104  }
105 };
106 
107 template<typename Func, typename Derived, int Start>
108 struct redux_novec_unroller<Func, Derived, Start, 1>
109 {
110  enum {
111  outer = Start / Derived::InnerSizeAtCompileTime,
112  inner = Start % Derived::InnerSizeAtCompileTime
113  };
114 
115  typedef typename Derived::Scalar Scalar;
116 
117  EIGEN_DEVICE_FUNC
118  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
119  {
120  return mat.coeffByOuterInner(outer, inner);
121  }
122 };
123 
124 // This is actually dead code and will never be called. It is required
125 // to prevent false warnings regarding failed inlining though
126 // for 0 length run() will never be called at all.
127 template<typename Func, typename Derived, int Start>
128 struct redux_novec_unroller<Func, Derived, Start, 0>
129 {
130  typedef typename Derived::Scalar Scalar;
131  EIGEN_DEVICE_FUNC
132  static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
133 };
134 
135 /*** vectorization ***/
136 
137 template<typename Func, typename Derived, int Start, int Length>
139 {
140  enum {
142  HalfLength = Length/2
143  };
144 
145  typedef typename Derived::Scalar Scalar;
147 
148  static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func)
149  {
150  return func.packetOp(
153  }
154 };
155 
156 template<typename Func, typename Derived, int Start>
157 struct redux_vec_unroller<Func, Derived, Start, 1>
158 {
159  enum {
161  outer = index / int(Derived::InnerSizeAtCompileTime),
162  inner = index % int(Derived::InnerSizeAtCompileTime),
163  alignment = Derived::Alignment
164  };
165 
166  typedef typename Derived::Scalar Scalar;
168 
169  static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
170  {
171  return mat.template packetByOuterInner<alignment,PacketScalar>(outer, inner);
172  }
173 };
174 
175 /***************************************************************************
176 * Part 3 : implementation of all cases
177 ***************************************************************************/
178 
179 template<typename Func, typename Derived,
182 >
183 struct redux_impl;
184 
185 template<typename Func, typename Derived>
186 struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
187 {
188  typedef typename Derived::Scalar Scalar;
189  EIGEN_DEVICE_FUNC
190  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
191  {
192  eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
193  Scalar res;
194  res = mat.coeffByOuterInner(0, 0);
195  for(Index i = 1; i < mat.innerSize(); ++i)
196  res = func(res, mat.coeffByOuterInner(0, i));
197  for(Index i = 1; i < mat.outerSize(); ++i)
198  for(Index j = 0; j < mat.innerSize(); ++j)
199  res = func(res, mat.coeffByOuterInner(i, j));
200  return res;
201  }
202 };
203 
204 template<typename Func, typename Derived>
206  : public redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime>
207 {};
208 
209 template<typename Func, typename Derived>
211 {
212  typedef typename Derived::Scalar Scalar;
214 
215  static Scalar run(const Derived &mat, const Func& func)
216  {
217  const Index size = mat.size();
218 
220  const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
221  enum {
222  alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
223  alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Derived::Alignment)
224  };
225  const Index alignedStart = internal::first_default_aligned(mat.nestedExpression());
226  const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
227  const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
228  const Index alignedEnd2 = alignedStart + alignedSize2;
229  const Index alignedEnd = alignedStart + alignedSize;
230  Scalar res;
231  if(alignedSize)
232  {
233  PacketScalar packet_res0 = mat.template packet<alignment,PacketScalar>(alignedStart);
234  if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
235  {
236  PacketScalar packet_res1 = mat.template packet<alignment,PacketScalar>(alignedStart+packetSize);
237  for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
238  {
239  packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(index));
240  packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment,PacketScalar>(index+packetSize));
241  }
242 
243  packet_res0 = func.packetOp(packet_res0,packet_res1);
244  if(alignedEnd>alignedEnd2)
245  packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(alignedEnd2));
246  }
247  res = func.predux(packet_res0);
248 
249  for(Index index = 0; index < alignedStart; ++index)
250  res = func(res,mat.coeff(index));
251 
252  for(Index index = alignedEnd; index < size; ++index)
253  res = func(res,mat.coeff(index));
254  }
255  else // too small to vectorize anything.
256  // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
257  {
258  res = mat.coeff(0);
259  for(Index index = 1; index < size; ++index)
260  res = func(res,mat.coeff(index));
261  }
262 
263  return res;
264  }
265 };
266 
267 // NOTE: for SliceVectorizedTraversal we simply bypass unrolling
268 template<typename Func, typename Derived, int Unrolling>
269 struct redux_impl<Func, Derived, SliceVectorizedTraversal, Unrolling>
270 {
271  typedef typename Derived::Scalar Scalar;
273 
274  EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func)
275  {
276  eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
277  const Index innerSize = mat.innerSize();
278  const Index outerSize = mat.outerSize();
279  enum {
281  };
282  const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
283  Scalar res;
284  if(packetedInnerSize)
285  {
286  PacketType packet_res = mat.template packet<Unaligned,PacketType>(0,0);
287  for(Index j=0; j<outerSize; ++j)
288  for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
289  packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned,PacketType>(j,i));
290 
291  res = func.predux(packet_res);
292  for(Index j=0; j<outerSize; ++j)
293  for(Index i=packetedInnerSize; i<innerSize; ++i)
294  res = func(res, mat.coeffByOuterInner(j,i));
295  }
296  else // too small to vectorize anything.
297  // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
298  {
300  }
301 
302  return res;
303  }
304 };
305 
306 template<typename Func, typename Derived>
307 struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
308 {
309  typedef typename Derived::Scalar Scalar;
310 
312  enum {
314  Size = Derived::SizeAtCompileTime,
315  VectorizedSize = (Size / PacketSize) * PacketSize
316  };
317  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
318  {
319  eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
320  if (VectorizedSize > 0) {
322  if (VectorizedSize != Size)
324  return res;
325  }
326  else {
328  }
329  }
330 };
331 
332 // evaluator adaptor
333 template<typename _XprType>
335 {
336 public:
337  typedef _XprType XprType;
338  EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {}
339 
340  typedef typename XprType::Scalar Scalar;
341  typedef typename XprType::CoeffReturnType CoeffReturnType;
342  typedef typename XprType::PacketScalar PacketScalar;
343  typedef typename XprType::PacketReturnType PacketReturnType;
344 
345  enum {
346  MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
347  MaxColsAtCompileTime = XprType::MaxColsAtCompileTime,
348  // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator
350  IsRowMajor = XprType::IsRowMajor,
351  SizeAtCompileTime = XprType::SizeAtCompileTime,
352  InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime,
355  };
356 
357  EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
358  EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
359  EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); }
360  EIGEN_DEVICE_FUNC Index innerSize() const { return m_xpr.innerSize(); }
361  EIGEN_DEVICE_FUNC Index outerSize() const { return m_xpr.outerSize(); }
362 
363  EIGEN_DEVICE_FUNC
364  CoeffReturnType coeff(Index row, Index col) const
365  { return m_evaluator.coeff(row, col); }
366 
367  EIGEN_DEVICE_FUNC
368  CoeffReturnType coeff(Index index) const
369  { return m_evaluator.coeff(index); }
370 
371  template<int LoadMode, typename PacketType>
372  PacketType packet(Index row, Index col) const
373  { return m_evaluator.template packet<LoadMode,PacketType>(row, col); }
374 
375  template<int LoadMode, typename PacketType>
376  PacketType packet(Index index) const
377  { return m_evaluator.template packet<LoadMode,PacketType>(index); }
378 
379  EIGEN_DEVICE_FUNC
380  CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
381  { return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
382 
383  template<int LoadMode, typename PacketType>
384  PacketType packetByOuterInner(Index outer, Index inner) const
385  { return m_evaluator.template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
386 
387  const XprType & nestedExpression() const { return m_xpr; }
388 
389 protected:
391  const XprType &m_xpr;
392 };
393 
394 } // end namespace internal
395 
396 /***************************************************************************
397 * Part 4 : public API
398 ***************************************************************************/
399 
400 
408 template<typename Derived>
409 template<typename Func>
411 DenseBase<Derived>::redux(const Func& func) const
412 {
413  eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
414 
415  typedef typename internal::redux_evaluator<Derived> ThisEvaluator;
416  ThisEvaluator thisEval(derived());
417 
418  return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func);
419 }
420 
424 template<typename Derived>
427 {
428  return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar>());
429 }
430 
434 template<typename Derived>
437 {
438  return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar>());
439 }
440 
447 template<typename Derived>
450 {
451  if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
452  return Scalar(0);
453  return derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>());
454 }
455 
460 template<typename Derived>
463 {
464 #ifdef __INTEL_COMPILER
465  #pragma warning push
466  #pragma warning ( disable : 2259 )
467 #endif
468  return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>())) / Scalar(this->size());
469 #ifdef __INTEL_COMPILER
470  #pragma warning pop
471 #endif
472 }
473 
481 template<typename Derived>
484 {
485  if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
486  return Scalar(1);
487  return derived().redux(Eigen::internal::scalar_product_op<Scalar>());
488 }
489 
496 template<typename Derived>
499 {
500  return derived().diagonal().sum();
501 }
502 
503 } // end namespace Eigen
504 
505 #endif // EIGEN_REDUX_H
XprType::CoeffReturnType CoeffReturnType
Definition: Redux.h:341
find_best_packet_helper< Size, typename packet_traits< T >::type >::type type
Definition: XprHelper.h:188
static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func &func)
Definition: Redux.h:148
XprType::PacketScalar PacketScalar
Definition: Redux.h:342
SCALAR Scalar
Definition: bench_gemm.cpp:33
EIGEN_DEVICE_FUNC Scalar prod() const
Definition: Redux.h:483
const unsigned int ActualPacketAccessBit
Definition: Constants.h:102
#define EIGEN_STRONG_INLINE
Definition: Macros.h:494
const int HugeCost
Definition: Constants.h:39
return int(ret)+1
#define EIGEN_DEBUG_VAR(x)
Definition: Macros.h:475
EIGEN_DEVICE_FUNC Scalar trace() const
Definition: Redux.h:498
const unsigned int DirectAccessBit
Definition: Constants.h:150
XprType::PacketReturnType PacketReturnType
Definition: Redux.h:343
EIGEN_DEVICE_FUNC Index innerSize() const
Definition: Redux.h:360
EIGEN_DEVICE_FUNC Index rows() const
Definition: Redux.h:357
PacketType packetByOuterInner(Index outer, Index inner) const
Definition: Redux.h:384
XprType::Scalar Scalar
Definition: Redux.h:340
#define EIGEN_PLAIN_ENUM_MAX(a, b)
Definition: Macros.h:876
Namespace containing all symbols from the Eigen library.
Definition: jet.h:637
const XprType & nestedExpression() const
Definition: Redux.h:387
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
Definition: Redux.h:368
static Scalar run(const Derived &mat, const Func &func)
Definition: Redux.h:215
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func &func)
Definition: Redux.h:190
internal::evaluator< XprType > m_evaluator
Definition: Redux.h:390
EIGEN_DEVICE_FUNC redux_evaluator(const XprType &xpr)
Definition: Redux.h:338
static Index first_default_aligned(const DenseBase< Derived > &m)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func &func)
Definition: Redux.h:317
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
Scalar Scalar int size
Definition: benchVecAdd.cpp:17
static EIGEN_DEVICE_FUNC Scalar run(const Derived &mat, const Func &func)
Definition: Redux.h:274
EIGEN_DEVICE_FUNC internal::traits< Derived >::Scalar maxCoeff() const
Definition: Redux.h:436
EIGEN_DEVICE_FUNC Scalar sum() const
Definition: Redux.h:449
EIGEN_DEVICE_FUNC Scalar redux(const BinaryOp &func) const
m row(1)
redux_traits< Func, Derived >::PacketType PacketScalar
Definition: Redux.h:146
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:33
#define eigen_assert(x)
Definition: Macros.h:579
EIGEN_DEVICE_FUNC internal::traits< Derived >::Scalar minCoeff() const
Definition: Redux.h:426
static bool debug
EIGEN_DEVICE_FUNC Index size() const
Definition: Redux.h:359
EIGEN_DEVICE_FUNC CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
Definition: Redux.h:380
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func &)
Definition: Redux.h:118
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
Definition: Redux.h:364
int func(const int &a)
Definition: testDSF.cpp:225
static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func &)
Definition: Redux.h:169
redux_traits< Func, Derived >::PacketType PacketScalar
Definition: Redux.h:167
EIGEN_DEVICE_FUNC Index outerSize() const
Definition: Redux.h:361
m col(1)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Derived &, const Func &)
Definition: Redux.h:132
const int Dynamic
Definition: Constants.h:21
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func &func)
Definition: Redux.h:100
PacketType packet(Index index) const
Definition: Redux.h:376
void run(Expr &expr, Dev &dev)
Definition: TensorSyclRun.h:33
EIGEN_DEVICE_FUNC Scalar mean() const
Definition: Redux.h:462
const unsigned int LinearAccessBit
Definition: Constants.h:125
find_best_packet< typename Derived::Scalar, Derived::SizeAtCompileTime >::type PacketType
Definition: Redux.h:30
std::ptrdiff_t j
EIGEN_DEVICE_FUNC Index cols() const
Definition: Redux.h:358
#define EIGEN_UNROLLING_LIMIT
Definition: Settings.h:24
PacketType packet(Index row, Index col) const
Definition: Redux.h:372


gtsam
Author(s):
autogenerated on Sat May 8 2021 02:43:50