TensorReverse.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2014 Navdeep Jaitly <ndjaitly@google.com>
5 // Benoit Steiner <benoit.steiner.goog@gmail.com>
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 #ifndef EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H
12 #define EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H
13 namespace Eigen {
14 
21 namespace internal {
22 template<typename ReverseDimensions, typename XprType>
23 struct traits<TensorReverseOp<ReverseDimensions,
24  XprType> > : public traits<XprType>
25 {
26  typedef typename XprType::Scalar Scalar;
28  typedef typename XprTraits::StorageKind StorageKind;
29  typedef typename XprTraits::Index Index;
30  typedef typename XprType::Nested Nested;
32  static const int NumDimensions = XprTraits::NumDimensions;
33  static const int Layout = XprTraits::Layout;
34  typedef typename XprTraits::PointerType PointerType;
35 };
36 
37 template<typename ReverseDimensions, typename XprType>
38 struct eval<TensorReverseOp<ReverseDimensions, XprType>, Eigen::Dense>
39 {
41 };
42 
43 template<typename ReverseDimensions, typename XprType>
44 struct nested<TensorReverseOp<ReverseDimensions, XprType>, 1,
45  typename eval<TensorReverseOp<ReverseDimensions, XprType> >::type>
46 {
48 };
49 
50 } // end namespace internal
51 
52 template<typename ReverseDimensions, typename XprType>
53 class TensorReverseOp : public TensorBase<TensorReverseOp<ReverseDimensions,
54  XprType>, WriteAccessors>
55 {
56  public:
60  typedef typename XprType::CoeffReturnType CoeffReturnType;
65 
67  const XprType& expr, const ReverseDimensions& reverse_dims)
68  : m_xpr(expr), m_reverse_dims(reverse_dims) { }
69 
71  const ReverseDimensions& reverse() const { return m_reverse_dims; }
72 
75  expression() const { return m_xpr; }
76 
78 
79 
80  protected:
81  typename XprType::Nested m_xpr;
82  const ReverseDimensions m_reverse_dims;
83 };
84 
85 // Eval as rvalue
86 template<typename ReverseDimensions, typename ArgType, typename Device>
87 struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device>
88 {
90  typedef typename XprType::Index Index;
93  typedef typename XprType::Scalar Scalar;
96  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
99 
100  enum {
101  IsAligned = false,
103  BlockAccess = NumDims > 0,
104  PreferBlockAccess = true,
106  CoordAccess = false, // to be implemented
107  RawAccess = false
108  };
109 
111 
112  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
115 
118 
119  typedef typename internal::TensorMaterializedBlock<CoeffReturnType, NumDims,
120  Layout, Index>
122  //===--------------------------------------------------------------------===//
123 
124  EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
125  : m_impl(op.expression(), device),
126  m_reverse(op.reverse()),
127  m_device(device)
128  {
129  // Reversing a scalar isn't supported yet. It would be a no-op anyway.
130  EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
131 
132  // Compute strides
133  m_dimensions = m_impl.dimensions();
134  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
135  m_strides[0] = 1;
136  for (int i = 1; i < NumDims; ++i) {
137  m_strides[i] = m_strides[i-1] * m_dimensions[i-1];
138  if (m_strides[i] > 0) m_fastStrides[i] = IndexDivisor(m_strides[i]);
139  }
140  } else {
141  m_strides[NumDims-1] = 1;
142  for (int i = NumDims - 2; i >= 0; --i) {
143  m_strides[i] = m_strides[i+1] * m_dimensions[i+1];
144  if (m_strides[i] > 0) m_fastStrides[i] = IndexDivisor(m_strides[i]);
145  }
146  }
147  }
148 
150  const Dimensions& dimensions() const { return m_dimensions; }
151 
152  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
153  m_impl.evalSubExprsIfNeeded(NULL);
154  return true;
155  }
156 
157 #ifdef EIGEN_USE_THREADS
158  template <typename EvalSubExprsCallback>
159  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
160  EvaluatorPointerType, EvalSubExprsCallback done) {
161  m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
162  }
163 #endif // EIGEN_USE_THREADS
164 
166  m_impl.cleanup();
167  }
168 
170  Index index) const {
171  eigen_assert(index < dimensions().TotalSize());
172  Index inputIndex = 0;
173  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
175  for (int i = NumDims - 1; i > 0; --i) {
176  Index idx = index / m_fastStrides[i];
177  index -= idx * m_strides[i];
178  if (m_reverse[i]) {
179  idx = m_dimensions[i] - idx - 1;
180  }
181  inputIndex += idx * m_strides[i] ;
182  }
183  if (m_reverse[0]) {
184  inputIndex += (m_dimensions[0] - index - 1);
185  } else {
186  inputIndex += index;
187  }
188  } else {
190  for (int i = 0; i < NumDims - 1; ++i) {
191  Index idx = index / m_fastStrides[i];
192  index -= idx * m_strides[i];
193  if (m_reverse[i]) {
194  idx = m_dimensions[i] - idx - 1;
195  }
196  inputIndex += idx * m_strides[i] ;
197  }
198  if (m_reverse[NumDims-1]) {
199  inputIndex += (m_dimensions[NumDims-1] - index - 1);
200  } else {
201  inputIndex += index;
202  }
203  }
204  return inputIndex;
205  }
206 
208  Index index) const {
209  return m_impl.coeff(reverseIndex(index));
210  }
211 
212  template<int LoadMode>
214  PacketReturnType packet(Index index) const
215  {
216  EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
217  eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
218 
219  // TODO(ndjaitly): write a better packing routine that uses
220  // local structure.
222  values[PacketSize];
224  for (int i = 0; i < PacketSize; ++i) {
225  values[i] = coeff(index+i);
226  }
227  PacketReturnType rslt = internal::pload<PacketReturnType>(values);
228  return rslt;
229  }
230 
233  const size_t target_size = m_device.lastLevelCacheSize();
234  // Block evaluation reads underlying memory in reverse order, and default
235  // cost model does not properly catch this in bytes stored/loaded.
236  return internal::TensorBlockResourceRequirements::skewed<Scalar>(
237  target_size)
238  .addCostPerCoeff({0, 0, 24});
239  }
240 
242  block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
243  bool /*root_of_expr_ast*/ = false) const {
244  // TODO(ezhulenev): If underlying tensor expression supports and prefers
245  // block evaluation we must use it. Currently we use coeff and packet
246  // access into the underlying tensor expression.
247  // static const bool useBlockAccessForArgType =
248  // TensorEvaluator<ArgType, Device>::BlockAccess &&
249  // TensorEvaluator<ArgType, Device>::PreferBlockAccess;
250 
251  static const bool isColMajor =
252  static_cast<int>(Layout) == static_cast<int>(ColMajor);
253 
254  static const Index inner_dim_idx = isColMajor ? 0 : NumDims - 1;
255  const bool inner_dim_reversed = m_reverse[inner_dim_idx];
256 
257  // Offset in the output block.
258  Index block_offset = 0;
259 
260  // Offset in the input Tensor.
261  Index input_offset = reverseIndex(desc.offset());
262 
263  // Initialize output block iterator state. Dimension in this array are
264  // always in inner_most -> outer_most order (col major layout).
266  for (int i = 0; i < NumDims; ++i) {
267  const int dim = isColMajor ? i : NumDims - 1 - i;
268  it[i].size = desc.dimension(dim);
269  it[i].count = 0;
270  it[i].reverse = m_reverse[dim];
271 
272  it[i].block_stride =
273  i == 0 ? 1 : (it[i - 1].size * it[i - 1].block_stride);
274  it[i].block_span = it[i].block_stride * (it[i].size - 1);
275 
276  it[i].input_stride = m_strides[dim];
277  it[i].input_span = it[i].input_stride * (it[i].size - 1);
278 
279  if (it[i].reverse) {
280  it[i].input_stride = -1 * it[i].input_stride;
281  it[i].input_span = -1 * it[i].input_span;
282  }
283  }
284 
285  // If multiple inner dimensions have the same reverse flag, check if we can
286  // merge them into a single virtual inner dimension.
287  int effective_inner_dim = 0;
288  for (int i = 1; i < NumDims; ++i) {
289  if (it[i].reverse != it[effective_inner_dim].reverse) break;
290  if (it[i].block_stride != it[effective_inner_dim].size) break;
291  if (it[i].block_stride != numext::abs(it[i].input_stride)) break;
292 
293  it[i].size = it[effective_inner_dim].size * it[i].size;
294 
295  it[i].block_stride = 1;
296  it[i].input_stride = (inner_dim_reversed ? -1 : 1);
297 
298  it[i].block_span = it[i].block_stride * (it[i].size - 1);
299  it[i].input_span = it[i].input_stride * (it[i].size - 1);
300 
301  effective_inner_dim = i;
302  }
303 
304  eigen_assert(it[effective_inner_dim].block_stride == 1);
305  eigen_assert(it[effective_inner_dim].input_stride ==
306  (inner_dim_reversed ? -1 : 1));
307 
308  const Index inner_dim_size = it[effective_inner_dim].size;
309 
310  // Prepare storage for the materialized reverse result.
311  const typename TensorBlock::Storage block_storage =
312  TensorBlock::prepareStorage(desc, scratch);
313  CoeffReturnType* block_buffer = block_storage.data();
314 
315  while (it[NumDims - 1].count < it[NumDims - 1].size) {
316  // Copy inner-most dimension data from reversed location in input.
317  Index dst = block_offset;
318  Index src = input_offset;
319 
320  // NOTE(ezhulenev): Adding vectorized path with internal::preverse showed
321  // worse results in benchmarks than a simple coefficient loop.
322  if (inner_dim_reversed) {
323  for (Index i = 0; i < inner_dim_size; ++i) {
324  block_buffer[dst] = m_impl.coeff(src);
325  ++dst;
326  --src;
327  }
328  } else {
329  for (Index i = 0; i < inner_dim_size; ++i) {
330  block_buffer[dst] = m_impl.coeff(src);
331  ++dst;
332  ++src;
333  }
334  }
335 
336  // For the 1d tensor we need to generate only one inner-most dimension.
337  if ((NumDims - effective_inner_dim) == 1) break;
338 
339  // Update offset.
340  for (Index i = effective_inner_dim + 1; i < NumDims; ++i) {
341  if (++it[i].count < it[i].size) {
342  block_offset += it[i].block_stride;
343  input_offset += it[i].input_stride;
344  break;
345  }
346  if (i != NumDims - 1) it[i].count = 0;
347  block_offset -= it[i].block_span;
348  input_offset -= it[i].input_span;
349  }
350  }
351 
352  return block_storage.AsTensorMaterializedBlock();
353  }
354 
356  double compute_cost = NumDims * (2 * TensorOpCost::AddCost<Index>() +
357  2 * TensorOpCost::MulCost<Index>() +
358  TensorOpCost::DivCost<Index>());
359  for (int i = 0; i < NumDims; ++i) {
360  if (m_reverse[i]) {
361  compute_cost += 2 * TensorOpCost::AddCost<Index>();
362  }
363  }
364  return m_impl.costPerCoeff(vectorized) +
365  TensorOpCost(0, 0, compute_cost, false /* vectorized */, PacketSize);
366  }
367 
368  EIGEN_DEVICE_FUNC typename Storage::Type data() const { return NULL; }
369 
370 #ifdef EIGEN_USE_SYCL
371  // binding placeholder accessors to a command group handler for SYCL
372  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
373  m_impl.bind(cgh);
374  }
375 #endif
376 
377  protected:
378  Dimensions m_dimensions;
382  ReverseDimensions m_reverse;
384 
385  private:
386  struct BlockIteratorState {
388  : size(0),
389  count(0),
390  reverse(false),
391  block_stride(0),
392  block_span(0),
393  input_stride(0),
394  input_span(0) {}
395 
396  Index size;
397  Index count;
398  bool reverse;
400  Index block_span;
402  Index input_span;
403  };
404 };
405 
406 // Eval as lvalue
407 
408 template <typename ReverseDimensions, typename ArgType, typename Device>
409 struct TensorEvaluator<TensorReverseOp<ReverseDimensions, ArgType>, Device>
410  : public TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>,
411  Device> {
413  Device> Base;
415  typedef typename XprType::Index Index;
418 
419  enum {
420  IsAligned = false,
422  BlockAccess = false,
423  PreferBlockAccess = false,
425  CoordAccess = false, // to be implemented
426  RawAccess = false
427  };
428  EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
429  : Base(op, device) {}
430 
431  typedef typename XprType::Scalar Scalar;
434  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
435 
436  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
438  //===--------------------------------------------------------------------===//
439 
441  const Dimensions& dimensions() const { return this->m_dimensions; }
442 
444  return this->m_impl.coeffRef(this->reverseIndex(index));
445  }
446 
447  template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
448  void writePacket(Index index, const PacketReturnType& x) {
449  EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
450  eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
451 
452  // This code is pilfered from TensorMorphing.h
453  EIGEN_ALIGN_MAX CoeffReturnType values[PacketSize];
454  internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
456  for (int i = 0; i < PacketSize; ++i) {
457  this->coeffRef(index+i) = values[i];
458  }
459  }
460 };
461 
462 
463 } // end namespace Eigen
464 
465 #endif // EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H
TensorEvaluator< const ArgType, Device >::TensorBlock ArgTensorBlock
SCALAR Scalar
Definition: bench_gemm.cpp:46
#define EIGEN_STRONG_INLINE
Definition: Macros.h:917
XprType::CoeffReturnType CoeffReturnType
Definition: TensorReverse.h:60
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
EIGEN_DEVICE_FUNC const internal::remove_all< typename XprType::Nested >::type & expression() const
Definition: TensorReverse.h:75
leaf::MyValues values
XprType::Nested m_xpr
Definition: TensorReverse.h:81
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index reverseIndex(Index index) const
Namespace containing all symbols from the Eigen library.
Definition: jet.h:637
const ReverseDimensions m_reverse_dims
Definition: TensorReverse.h:82
A cost model used to limit the number of threads used for evaluating tensor expression.
TensorMaterializedBlock AsTensorMaterializedBlock() const
Definition: TensorBlock.h:700
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
#define EIGEN_STATIC_ASSERT(CONDITION, MSG)
Definition: StaticAssert.h:127
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
#define EIGEN_ALIGN_MAX
IndexType dimension(int index) const
Definition: TensorBlock.h:300
Eigen::internal::traits< TensorReverseOp >::StorageKind StorageKind
Definition: TensorReverse.h:63
TensorBase< TensorReverseOp< ReverseDimensions, XprType >, WriteAccessors > Base
Definition: TensorReverse.h:57
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Generic expression where a coefficient-wise binary operator is applied to two expressions.
Definition: CwiseBinaryOp.h:77
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE internal::enable_if< NumTraits< T >::IsSigned||NumTraits< T >::IsComplex, typename NumTraits< T >::Real >::type abs(const T &x)
internal::TensorMaterializedBlock< CoeffReturnType, NumDims, Layout, Index > TensorBlock
Eigen::NumTraits< Scalar >::Real RealScalar
Definition: TensorReverse.h:59
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:74
#define eigen_assert(x)
Definition: Macros.h:1037
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Eigen::internal::traits< TensorReverseOp >::Index Index
Definition: TensorReverse.h:64
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
TensorEvaluator< const TensorReverseOp< ReverseDimensions, ArgType >, Device > Base
#define NULL
Definition: ccolamd.c:609
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReverseOp(const XprType &expr, const ReverseDimensions &reverse_dims)
Definition: TensorReverse.h:66
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketReturnType &x)
The tensor base class.
Definition: TensorBase.h:973
EIGEN_CONSTEXPR Index size(const T &x)
Definition: Meta.h:479
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:976
Eigen::internal::traits< TensorReverseOp >::Scalar Scalar
Definition: TensorReverse.h:58
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar & coeffRef(Index index)
void reverse(const MatrixType &m)
#define EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(Derived)
Definition: TensorMacros.h:94
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Eigen::internal::nested< TensorReverseOp >::type Nested
Definition: TensorReverse.h:61
#define EIGEN_DEVICE_REF
Definition: TensorMacros.h:50
Generic expression where a coefficient-wise unary operator is applied to an expression.
Definition: CwiseUnaryOp.h:55
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy x
const std::vector< size_t > dimensions
EIGEN_DEVICE_FUNC const ReverseDimensions & reverse() const
Definition: TensorReverse.h:71
#define EIGEN_UNROLL_LOOP
Definition: Macros.h:1461


gtsam
Author(s):
autogenerated on Tue Jul 4 2023 02:37:37