TensorPadding.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
12 
13 namespace Eigen {
14 
22 namespace internal {
23 template<typename PaddingDimensions, typename XprType>
24 struct traits<TensorPaddingOp<PaddingDimensions, XprType> > : public traits<XprType>
25 {
26  typedef typename XprType::Scalar Scalar;
28  typedef typename XprTraits::StorageKind StorageKind;
29  typedef typename XprTraits::Index Index;
30  typedef typename XprType::Nested Nested;
32  static const int NumDimensions = XprTraits::NumDimensions;
33  static const int Layout = XprTraits::Layout;
34  typedef typename XprTraits::PointerType PointerType;
35 };
36 
37 template<typename PaddingDimensions, typename XprType>
38 struct eval<TensorPaddingOp<PaddingDimensions, XprType>, Eigen::Dense>
39 {
41 };
42 
43 template<typename PaddingDimensions, typename XprType>
44 struct nested<TensorPaddingOp<PaddingDimensions, XprType>, 1, typename eval<TensorPaddingOp<PaddingDimensions, XprType> >::type>
45 {
47 };
48 
49 } // end namespace internal
50 
51 
52 
53 template<typename PaddingDimensions, typename XprType>
54 class TensorPaddingOp : public TensorBase<TensorPaddingOp<PaddingDimensions, XprType>, ReadOnlyAccessors>
55 {
56  public:
59  typedef typename XprType::CoeffReturnType CoeffReturnType;
63 
64  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorPaddingOp(const XprType& expr, const PaddingDimensions& padding_dims, const Scalar padding_value)
65  : m_xpr(expr), m_padding_dims(padding_dims), m_padding_value(padding_value) {}
66 
68  const PaddingDimensions& padding() const { return m_padding_dims; }
70  Scalar padding_value() const { return m_padding_value; }
71 
74  expression() const { return m_xpr; }
75 
76  protected:
77  typename XprType::Nested m_xpr;
78  const PaddingDimensions m_padding_dims;
79  const Scalar m_padding_value;
80 };
81 
82 
83 // Eval as rvalue
84 template<typename PaddingDimensions, typename ArgType, typename Device>
85 struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device>
86 {
88  typedef typename XprType::Index Index;
91  typedef typename XprType::Scalar Scalar;
94  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
97 
98  enum {
99  IsAligned = true,
102  PreferBlockAccess = true,
104  CoordAccess = true,
105  RawAccess = false
106  };
107 
109 
110  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
113 
114  typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumDims,
115  Layout, Index>
117  //===--------------------------------------------------------------------===//
118 
119  EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
120  : m_impl(op.expression(), device), m_padding(op.padding()), m_paddingValue(op.padding_value()), m_device(device)
121  {
122  // The padding op doesn't change the rank of the tensor. Directly padding a scalar would lead
123  // to a vector, which doesn't make sense. Instead one should reshape the scalar into a vector
124  // of 1 element first and then pad.
125  EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
126 
127  // Compute dimensions
128  m_dimensions = m_impl.dimensions();
129  for (int i = 0; i < NumDims; ++i) {
130  m_dimensions[i] += m_padding[i].first + m_padding[i].second;
131  }
132  const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
133  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
134  m_inputStrides[0] = 1;
135  m_outputStrides[0] = 1;
136  for (int i = 1; i < NumDims; ++i) {
137  m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
138  m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
139  }
140  m_outputStrides[NumDims] = m_outputStrides[NumDims-1] * m_dimensions[NumDims-1];
141  } else {
142  m_inputStrides[NumDims - 1] = 1;
143  m_outputStrides[NumDims] = 1;
144  for (int i = NumDims - 2; i >= 0; --i) {
145  m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
146  m_outputStrides[i+1] = m_outputStrides[i+2] * m_dimensions[i+1];
147  }
148  m_outputStrides[0] = m_outputStrides[1] * m_dimensions[0];
149  }
150  }
151 
152  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
153 
154  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
155  m_impl.evalSubExprsIfNeeded(NULL);
156  return true;
157  }
158 
159 #ifdef EIGEN_USE_THREADS
160  template <typename EvalSubExprsCallback>
161  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
162  EvaluatorPointerType, EvalSubExprsCallback done) {
163  m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
164  }
165 #endif // EIGEN_USE_THREADS
166 
168  m_impl.cleanup();
169  }
170 
171  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
172  {
173  eigen_assert(index < dimensions().TotalSize());
174  Index inputIndex = 0;
175  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
177  for (int i = NumDims - 1; i > 0; --i) {
178  const Index idx = index / m_outputStrides[i];
179  if (isPaddingAtIndexForDim(idx, i)) {
180  return m_paddingValue;
181  }
182  inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
183  index -= idx * m_outputStrides[i];
184  }
185  if (isPaddingAtIndexForDim(index, 0)) {
186  return m_paddingValue;
187  }
188  inputIndex += (index - m_padding[0].first);
189  } else {
191  for (int i = 0; i < NumDims - 1; ++i) {
192  const Index idx = index / m_outputStrides[i+1];
193  if (isPaddingAtIndexForDim(idx, i)) {
194  return m_paddingValue;
195  }
196  inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
197  index -= idx * m_outputStrides[i+1];
198  }
199  if (isPaddingAtIndexForDim(index, NumDims-1)) {
200  return m_paddingValue;
201  }
202  inputIndex += (index - m_padding[NumDims-1].first);
203  }
204  return m_impl.coeff(inputIndex);
205  }
206 
207  template<int LoadMode>
208  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
209  {
210  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
211  return packetColMajor(index);
212  }
213  return packetRowMajor(index);
214  }
215 
217  TensorOpCost cost = m_impl.costPerCoeff(vectorized);
218  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
220  for (int i = 0; i < NumDims; ++i)
221  updateCostPerDimension(cost, i, i == 0);
222  } else {
224  for (int i = NumDims - 1; i >= 0; --i)
225  updateCostPerDimension(cost, i, i == NumDims - 1);
226  }
227  return cost;
228  }
229 
232  const size_t target_size = m_device.lastLevelCacheSize();
234  internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size),
235  m_impl.getResourceRequirements());
236  }
237 
239  block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
240  bool /*root_of_expr_ast*/ = false) const {
241  // If one of the dimensions is zero, return empty block view.
242  if (desc.size() == 0) {
244  desc.dimensions());
245  }
246 
247  static const bool IsColMajor = Layout == static_cast<int>(ColMajor);
248  const int inner_dim_idx = IsColMajor ? 0 : NumDims - 1;
249 
250  Index offset = desc.offset();
251 
252  // Compute offsets in the output tensor corresponding to the desc.offset().
253  DSizes<Index, NumDims> output_offsets;
254  for (int i = NumDims - 1; i > 0; --i) {
255  const int dim = IsColMajor ? i : NumDims - i - 1;
256  const int stride_dim = IsColMajor ? dim : dim + 1;
257  output_offsets[dim] = offset / m_outputStrides[stride_dim];
258  offset -= output_offsets[dim] * m_outputStrides[stride_dim];
259  }
260  output_offsets[inner_dim_idx] = offset;
261 
262  // Offsets in the input corresponding to output offsets.
263  DSizes<Index, NumDims> input_offsets = output_offsets;
264  for (int i = 0; i < NumDims; ++i) {
265  const int dim = IsColMajor ? i : NumDims - i - 1;
266  input_offsets[dim] = input_offsets[dim] - m_padding[dim].first;
267  }
268 
269  // Compute offset in the input buffer (at this point it might be illegal and
270  // point outside of the input buffer, because we don't check for negative
271  // offsets, it will be autocorrected in the block iteration loop below).
272  Index input_offset = 0;
273  for (int i = 0; i < NumDims; ++i) {
274  const int dim = IsColMajor ? i : NumDims - i - 1;
275  input_offset += input_offsets[dim] * m_inputStrides[dim];
276  }
277 
278  // Destination buffer and scratch buffer both indexed from 0 and have the
279  // same dimensions as the requested block (for destination buffer this
280  // property is guaranteed by `desc.destination()`).
281  Index output_offset = 0;
282  const DSizes<Index, NumDims> output_strides =
283  internal::strides<Layout>(desc.dimensions());
284 
285  // NOTE(ezhulenev): We initialize bock iteration state for `NumDims - 1`
286  // dimensions, skipping innermost dimension. In theory it should be possible
287  // to squeeze matching innermost dimensions, however in practice that did
288  // not show any improvements in benchmarks. Also in practice first outer
289  // dimension usually has padding, and will prevent squeezing.
290 
291  // Initialize output block iterator state. Dimension in this array are
292  // always in inner_most -> outer_most order (col major layout).
293  array<BlockIteratorState, NumDims - 1> it;
294  for (int i = 0; i < NumDims - 1; ++i) {
295  const int dim = IsColMajor ? i + 1 : NumDims - i - 2;
296  it[i].count = 0;
297  it[i].size = desc.dimension(dim);
298 
299  it[i].input_stride = m_inputStrides[dim];
300  it[i].input_span = it[i].input_stride * (it[i].size - 1);
301 
302  it[i].output_stride = output_strides[dim];
303  it[i].output_span = it[i].output_stride * (it[i].size - 1);
304  }
305 
306  const Index input_inner_dim_size =
307  static_cast<Index>(m_impl.dimensions()[inner_dim_idx]);
308 
309  // Total output size.
310  const Index output_size = desc.size();
311 
312  // We will fill inner dimension of this size in the output. It might be
313  // larger than the inner dimension in the input, so we might have to pad
314  // before/after we copy values from the input inner dimension.
315  const Index output_inner_dim_size = desc.dimension(inner_dim_idx);
316 
317  // How many values to fill with padding BEFORE reading from the input inner
318  // dimension.
319  const Index output_inner_pad_before_size =
320  input_offsets[inner_dim_idx] < 0
321  ? numext::mini(numext::abs(input_offsets[inner_dim_idx]),
322  output_inner_dim_size)
323  : 0;
324 
325  // How many values we can actually copy from the input inner dimension.
326  const Index output_inner_copy_size = numext::mini(
327  // Want to copy from input.
328  (output_inner_dim_size - output_inner_pad_before_size),
329  // Can copy from input.
330  numext::maxi(input_inner_dim_size - (input_offsets[inner_dim_idx] +
331  output_inner_pad_before_size),
332  Index(0)));
333 
334  eigen_assert(output_inner_copy_size >= 0);
335 
336  // How many values to fill with padding AFTER reading from the input inner
337  // dimension.
338  const Index output_inner_pad_after_size =
339  (output_inner_dim_size - output_inner_copy_size -
340  output_inner_pad_before_size);
341 
342  // Sanity check, sum of all sizes must be equal to the output size.
343  eigen_assert(output_inner_dim_size ==
344  (output_inner_pad_before_size + output_inner_copy_size +
345  output_inner_pad_after_size));
346 
347  // Keep track of current coordinates and padding in the output.
348  DSizes<Index, NumDims> output_coord = output_offsets;
349  DSizes<Index, NumDims> output_padded;
350  for (int i = 0; i < NumDims; ++i) {
351  const int dim = IsColMajor ? i : NumDims - i - 1;
352  output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
353  }
354 
356 
357  // Prepare storage for the materialized padding result.
358  const typename TensorBlock::Storage block_storage =
359  TensorBlock::prepareStorage(desc, scratch);
360 
361  // TODO(ezhulenev): Squeeze multiple non-padded inner dimensions into a
362  // single logical inner dimension.
363 
364  // When possible we squeeze writes for the innermost (only if non-padded)
365  // dimension with the first padded dimension. This allows to reduce the
366  // number of calls to LinCopy and better utilize vector instructions.
367  const bool squeeze_writes =
368  NumDims > 1 &&
369  // inner dimension is not padded
370  (input_inner_dim_size == m_dimensions[inner_dim_idx]) &&
371  // and equal to the block inner dimension
372  (input_inner_dim_size == output_inner_dim_size);
373 
374  const int squeeze_dim = IsColMajor ? inner_dim_idx + 1 : inner_dim_idx - 1;
375 
376  // Maximum coordinate on a squeeze dimension that we can write to.
377  const Index squeeze_max_coord =
378  squeeze_writes ? numext::mini(
379  // max non-padded element in the input
380  static_cast<Index>(m_dimensions[squeeze_dim] -
381  m_padding[squeeze_dim].second),
382  // max element in the output buffer
383  static_cast<Index>(output_offsets[squeeze_dim] +
384  desc.dimension(squeeze_dim)))
385  : static_cast<Index>(0);
386 
387  // Iterate copying data from `m_impl.data()` to the output buffer.
388  for (Index size = 0; size < output_size;) {
389  // Detect if we are in the padded region (exclude innermost dimension).
390  bool is_padded = false;
391  for (int j = 1; j < NumDims; ++j) {
392  const int dim = IsColMajor ? j : NumDims - j - 1;
393  is_padded = output_padded[dim];
394  if (is_padded) break;
395  }
396 
397  if (is_padded) {
398  // Fill single innermost dimension with padding value.
399  size += output_inner_dim_size;
400 
401  LinCopy::template Run<LinCopy::Kind::FillLinear>(
402  typename LinCopy::Dst(output_offset, 1, block_storage.data()),
403  typename LinCopy::Src(0, 0, &m_paddingValue),
404  output_inner_dim_size);
405 
406 
407  } else if (squeeze_writes) {
408  // Squeeze multiple reads from innermost dimensions.
409  const Index squeeze_num = squeeze_max_coord - output_coord[squeeze_dim];
410  size += output_inner_dim_size * squeeze_num;
411 
412  // Copy `squeeze_num` inner dimensions from input to output.
413  LinCopy::template Run<LinCopy::Kind::Linear>(
414  typename LinCopy::Dst(output_offset, 1, block_storage.data()),
415  typename LinCopy::Src(input_offset, 1, m_impl.data()),
416  output_inner_dim_size * squeeze_num);
417 
418  // Update iteration state for only `squeeze_num - 1` processed inner
419  // dimensions, because we have another iteration state update at the end
420  // of the loop that will update iteration state for the last inner
421  // processed dimension.
422  it[0].count += (squeeze_num - 1);
423  input_offset += it[0].input_stride * (squeeze_num - 1);
424  output_offset += it[0].output_stride * (squeeze_num - 1);
425  output_coord[squeeze_dim] += (squeeze_num - 1);
426 
427  } else {
428  // Single read from innermost dimension.
429  size += output_inner_dim_size;
430 
431  { // Fill with padding before copying from input inner dimension.
432  const Index out = output_offset;
433 
434  LinCopy::template Run<LinCopy::Kind::FillLinear>(
435  typename LinCopy::Dst(out, 1, block_storage.data()),
436  typename LinCopy::Src(0, 0, &m_paddingValue),
437  output_inner_pad_before_size);
438  }
439 
440  { // Copy data from input inner dimension.
441  const Index out = output_offset + output_inner_pad_before_size;
442  const Index in = input_offset + output_inner_pad_before_size;
443 
444  eigen_assert(output_inner_copy_size == 0 || m_impl.data() != NULL);
445 
446  LinCopy::template Run<LinCopy::Kind::Linear>(
447  typename LinCopy::Dst(out, 1, block_storage.data()),
448  typename LinCopy::Src(in, 1, m_impl.data()),
449  output_inner_copy_size);
450  }
451 
452  { // Fill with padding after copying from input inner dimension.
453  const Index out = output_offset + output_inner_pad_before_size +
454  output_inner_copy_size;
455 
456  LinCopy::template Run<LinCopy::Kind::FillLinear>(
457  typename LinCopy::Dst(out, 1, block_storage.data()),
458  typename LinCopy::Src(0, 0, &m_paddingValue),
459  output_inner_pad_after_size);
460  }
461  }
462 
463  for (int j = 0; j < NumDims - 1; ++j) {
464  const int dim = IsColMajor ? j + 1 : NumDims - j - 2;
465 
466  if (++it[j].count < it[j].size) {
467  input_offset += it[j].input_stride;
468  output_offset += it[j].output_stride;
469  output_coord[dim] += 1;
470  output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
471  break;
472  }
473  it[j].count = 0;
474  input_offset -= it[j].input_span;
475  output_offset -= it[j].output_span;
476  output_coord[dim] -= it[j].size - 1;
477  output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
478  }
479  }
480 
481  return block_storage.AsTensorMaterializedBlock();
482  }
483 
484  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const { return NULL; }
485 
486 #ifdef EIGEN_USE_SYCL
487  // binding placeholder accessors to a command group handler for SYCL
488  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
489  m_impl.bind(cgh);
490  }
491 #endif
492 
493  private:
494  struct BlockIteratorState {
496  : count(0),
497  size(0),
498  input_stride(0),
499  input_span(0),
500  output_stride(0),
501  output_span(0) {}
502 
503  Index count;
504  Index size;
506  Index input_span;
508  Index output_span;
509  };
510 
512  Index index, int dim_index) const {
513 #if defined(EIGEN_HAS_INDEX_LIST)
514  return (!internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0) &&
515  index < m_padding[dim_index].first) ||
516  (!internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0) &&
517  index >= m_dimensions[dim_index] - m_padding[dim_index].second);
518 #else
519  return (index < m_padding[dim_index].first) ||
520  (index >= m_dimensions[dim_index] - m_padding[dim_index].second);
521 #endif
522  }
523 
525  int dim_index) const {
526 #if defined(EIGEN_HAS_INDEX_LIST)
527  return internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0);
528 #else
529  EIGEN_UNUSED_VARIABLE(dim_index);
530  return false;
531 #endif
532  }
533 
535  int dim_index) const {
536 #if defined(EIGEN_HAS_INDEX_LIST)
537  return internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0);
538 #else
539  EIGEN_UNUSED_VARIABLE(dim_index);
540  return false;
541 #endif
542  }
543 
544 
545  void updateCostPerDimension(TensorOpCost& cost, int i, bool first) const {
546  const double in = static_cast<double>(m_impl.dimensions()[i]);
547  const double out = in + m_padding[i].first + m_padding[i].second;
548  if (out == 0)
549  return;
550  const double reduction = in / out;
551  cost *= reduction;
552  if (first) {
553  cost += TensorOpCost(0, 0, 2 * TensorOpCost::AddCost<Index>() +
554  reduction * (1 * TensorOpCost::AddCost<Index>()));
555  } else {
556  cost += TensorOpCost(0, 0, 2 * TensorOpCost::AddCost<Index>() +
557  2 * TensorOpCost::MulCost<Index>() +
558  reduction * (2 * TensorOpCost::MulCost<Index>() +
559  1 * TensorOpCost::DivCost<Index>()));
560  }
561  }
562 
563  protected:
564 
565  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const
566  {
567  EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
568  eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
569 
570  const Index initialIndex = index;
571  Index inputIndex = 0;
573  for (int i = NumDims - 1; i > 0; --i) {
574  const Index firstIdx = index;
575  const Index lastIdx = index + PacketSize - 1;
576  const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i];
577  const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i];
578  const Index lastPaddedRight = m_outputStrides[i+1];
579 
580  if (!isLeftPaddingCompileTimeZero(i) && lastIdx < lastPaddedLeft) {
581  // all the coefficient are in the padding zone.
582  return internal::pset1<PacketReturnType>(m_paddingValue);
583  }
584  else if (!isRightPaddingCompileTimeZero(i) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
585  // all the coefficient are in the padding zone.
586  return internal::pset1<PacketReturnType>(m_paddingValue);
587  }
588  else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
589  // all the coefficient are between the 2 padding zones.
590  const Index idx = index / m_outputStrides[i];
591  inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
592  index -= idx * m_outputStrides[i];
593  }
594  else {
595  // Every other case
596  return packetWithPossibleZero(initialIndex);
597  }
598  }
599 
600  const Index lastIdx = index + PacketSize - 1;
601  const Index firstIdx = index;
602  const Index lastPaddedLeft = m_padding[0].first;
603  const Index firstPaddedRight = (m_dimensions[0] - m_padding[0].second);
604  const Index lastPaddedRight = m_outputStrides[1];
605 
606  if (!isLeftPaddingCompileTimeZero(0) && lastIdx < lastPaddedLeft) {
607  // all the coefficient are in the padding zone.
608  return internal::pset1<PacketReturnType>(m_paddingValue);
609  }
610  else if (!isRightPaddingCompileTimeZero(0) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
611  // all the coefficient are in the padding zone.
612  return internal::pset1<PacketReturnType>(m_paddingValue);
613  }
614  else if ((isLeftPaddingCompileTimeZero(0) && isRightPaddingCompileTimeZero(0)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
615  // all the coefficient are between the 2 padding zones.
616  inputIndex += (index - m_padding[0].first);
617  return m_impl.template packet<Unaligned>(inputIndex);
618  }
619  // Every other case
620  return packetWithPossibleZero(initialIndex);
621  }
622 
623  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const
624  {
625  EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
626  eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
627 
628  const Index initialIndex = index;
629  Index inputIndex = 0;
631  for (int i = 0; i < NumDims - 1; ++i) {
632  const Index firstIdx = index;
633  const Index lastIdx = index + PacketSize - 1;
634  const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i+1];
635  const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i+1];
636  const Index lastPaddedRight = m_outputStrides[i];
637 
638  if (!isLeftPaddingCompileTimeZero(i) && lastIdx < lastPaddedLeft) {
639  // all the coefficient are in the padding zone.
640  return internal::pset1<PacketReturnType>(m_paddingValue);
641  }
642  else if (!isRightPaddingCompileTimeZero(i) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
643  // all the coefficient are in the padding zone.
644  return internal::pset1<PacketReturnType>(m_paddingValue);
645  }
646  else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
647  // all the coefficient are between the 2 padding zones.
648  const Index idx = index / m_outputStrides[i+1];
649  inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
650  index -= idx * m_outputStrides[i+1];
651  }
652  else {
653  // Every other case
654  return packetWithPossibleZero(initialIndex);
655  }
656  }
657 
658  const Index lastIdx = index + PacketSize - 1;
659  const Index firstIdx = index;
660  const Index lastPaddedLeft = m_padding[NumDims-1].first;
661  const Index firstPaddedRight = (m_dimensions[NumDims-1] - m_padding[NumDims-1].second);
662  const Index lastPaddedRight = m_outputStrides[NumDims-1];
663 
664  if (!isLeftPaddingCompileTimeZero(NumDims-1) && lastIdx < lastPaddedLeft) {
665  // all the coefficient are in the padding zone.
666  return internal::pset1<PacketReturnType>(m_paddingValue);
667  }
668  else if (!isRightPaddingCompileTimeZero(NumDims-1) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
669  // all the coefficient are in the padding zone.
670  return internal::pset1<PacketReturnType>(m_paddingValue);
671  }
672  else if ((isLeftPaddingCompileTimeZero(NumDims-1) && isRightPaddingCompileTimeZero(NumDims-1)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
673  // all the coefficient are between the 2 padding zones.
674  inputIndex += (index - m_padding[NumDims-1].first);
675  return m_impl.template packet<Unaligned>(inputIndex);
676  }
677  // Every other case
678  return packetWithPossibleZero(initialIndex);
679  }
680 
682  {
685  for (int i = 0; i < PacketSize; ++i) {
686  values[i] = coeff(index+i);
687  }
688  PacketReturnType rslt = internal::pload<PacketReturnType>(values);
689  return rslt;
690  }
691 
692  Dimensions m_dimensions;
696  PaddingDimensions m_padding;
697 
699 
701 };
702 
703 
704 
705 
706 } // end namespace Eigen
707 
708 #endif // EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
#define EIGEN_ALWAYS_INLINE
Definition: Macros.h:932
SCALAR Scalar
Definition: bench_gemm.cpp:46
#define EIGEN_STRONG_INLINE
Definition: Macros.h:917
EIGEN_DEVICE_FUNC Scalar padding_value() const
Definition: TensorPadding.h:70
EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE std::size_t size()
Definition: EmulateArray.h:44
std::ofstream out("Result.txt")
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const
EIGEN_DEVICE_FUNC const PaddingDimensions & padding() const
Definition: TensorPadding.h:68
leaf::MyValues values
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy y set format x g set format y g set format x2 g set format y2 g set format z g set angles radians set nogrid set key title set key left top Right noreverse box linetype linewidth samplen spacing width set nolabel set noarrow set nologscale set logscale x set set pointsize set encoding default set nopolar set noparametric set set set set surface set nocontour set clabel set mapping cartesian set nohidden3d set cntrparam order set cntrparam linear set cntrparam levels auto set cntrparam points set size set set xzeroaxis lt lw set x2zeroaxis lt lw set yzeroaxis lt lw set y2zeroaxis lt lw set tics in set ticslevel set tics set mxtics default set mytics default set mx2tics default set my2tics default set xtics border mirror norotate autofreq set ytics border mirror norotate autofreq set ztics border nomirror norotate autofreq set nox2tics set noy2tics set timestamp bottom norotate offset
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Namespace containing all symbols from the Eigen library.
Definition: jet.h:637
A cost model used to limit the number of threads used for evaluating tensor expression.
TensorMaterializedBlock AsTensorMaterializedBlock() const
Definition: TensorBlock.h:700
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const
#define EIGEN_STATIC_ASSERT(CONDITION, MSG)
Definition: StaticAssert.h:127
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T maxi(const T &x, const T &y)
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isPaddingAtIndexForDim(Index index, int dim_index) const
#define EIGEN_ALIGN_MAX
EIGEN_CONSTEXPR Index first(const T &x) EIGEN_NOEXCEPT
Eigen::NumTraits< Scalar >::Real RealScalar
Definition: TensorPadding.h:58
IndexType dimension(int index) const
Definition: TensorBlock.h:300
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Eigen::internal::traits< TensorPaddingOp >::Scalar Scalar
Definition: TensorPadding.h:57
XprType::Nested m_xpr
Definition: TensorPadding.h:77
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isRightPaddingCompileTimeZero(int dim_index) const
Generic expression where a coefficient-wise binary operator is applied to two expressions.
Definition: CwiseBinaryOp.h:77
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE internal::enable_if< NumTraits< T >::IsSigned||NumTraits< T >::IsComplex, typename NumTraits< T >::Real >::type abs(const T &x)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorPaddingOp(const XprType &expr, const PaddingDimensions &padding_dims, const Scalar padding_value)
Definition: TensorPadding.h:64
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:74
#define eigen_assert(x)
Definition: Macros.h:1037
const Scalar m_padding_value
Definition: TensorPadding.h:79
EIGEN_DEVICE_FUNC const internal::remove_all< typename XprType::Nested >::type & expression() const
Definition: TensorPadding.h:74
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const
#define NULL
Definition: ccolamd.c:609
const PaddingDimensions m_padding_dims
Definition: TensorPadding.h:78
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T mini(const T &x, const T &y)
XprType::CoeffReturnType CoeffReturnType
Definition: TensorPadding.h:59
Eigen::internal::nested< TensorPaddingOp >::type Nested
Definition: TensorPadding.h:60
The tensor base class.
Definition: TensorBase.h:973
EIGEN_CONSTEXPR Index size(const T &x)
Definition: Meta.h:479
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:976
const Dimensions & dimensions() const
Definition: TensorBlock.h:299
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const
Eigen::internal::traits< TensorPaddingOp >::StorageKind StorageKind
Definition: TensorPadding.h:61
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
#define EIGEN_DEVICE_REF
Definition: TensorMacros.h:50
Generic expression where a coefficient-wise unary operator is applied to an expression.
Definition: CwiseUnaryOp.h:55
const std::vector< size_t > dimensions
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockResourceRequirements merge(const TensorBlockResourceRequirements &lhs, const TensorBlockResourceRequirements &rhs)
Definition: TensorBlock.h:138
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isLeftPaddingCompileTimeZero(int dim_index) const
std::ptrdiff_t j
Eigen::internal::traits< TensorPaddingOp >::Index Index
Definition: TensorPadding.h:62
#define EIGEN_UNUSED_VARIABLE(var)
Definition: Macros.h:1076
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
#define EIGEN_UNROLL_LOOP
Definition: Macros.h:1461
internal::TensorMaterializedBlock< ScalarNoConst, NumDims, Layout, Index > TensorBlock


gtsam
Author(s):
autogenerated on Tue Jul 4 2023 02:37:12