10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_PADDING_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_PADDING_H 23 template<
typename PaddingDimensions,
typename XprType>
30 typedef typename XprType::Nested
Nested;
32 static const int NumDimensions = XprTraits::NumDimensions;
33 static const int Layout = XprTraits::Layout;
37 template<
typename PaddingDimensions,
typename XprType>
43 template<
typename PaddingDimensions,
typename XprType>
53 template<
typename PaddingDimensions,
typename XprType>
65 : m_xpr(expr), m_padding_dims(padding_dims), m_padding_value(padding_value) {}
68 const PaddingDimensions&
padding()
const {
return m_padding_dims; }
84 template<
typename PaddingDimensions,
typename ArgType,
typename Device>
102 PreferBlockAccess =
true,
120 : m_impl(op.expression(), device), m_padding(op.padding()), m_paddingValue(op.padding_value()), m_device(device)
128 m_dimensions = m_impl.dimensions();
129 for (
int i = 0;
i < NumDims; ++
i) {
130 m_dimensions[
i] += m_padding[
i].first + m_padding[
i].second;
133 if (static_cast<int>(Layout) == static_cast<int>(
ColMajor)) {
134 m_inputStrides[0] = 1;
135 m_outputStrides[0] = 1;
136 for (
int i = 1;
i < NumDims; ++
i) {
137 m_inputStrides[
i] = m_inputStrides[
i-1] * input_dims[
i-1];
138 m_outputStrides[
i] = m_outputStrides[
i-1] * m_dimensions[
i-1];
140 m_outputStrides[NumDims] = m_outputStrides[NumDims-1] * m_dimensions[NumDims-1];
142 m_inputStrides[NumDims - 1] = 1;
143 m_outputStrides[NumDims] = 1;
144 for (
int i = NumDims - 2;
i >= 0; --
i) {
145 m_inputStrides[
i] = m_inputStrides[
i+1] * input_dims[
i+1];
146 m_outputStrides[
i+1] = m_outputStrides[
i+2] * m_dimensions[
i+1];
148 m_outputStrides[0] = m_outputStrides[1] * m_dimensions[0];
155 m_impl.evalSubExprsIfNeeded(
NULL);
159 #ifdef EIGEN_USE_THREADS 160 template <
typename EvalSubExprsCallback>
162 EvaluatorPointerType, EvalSubExprsCallback done) {
163 m_impl.evalSubExprsIfNeededAsync(
nullptr, [done](
bool) { done(
true); });
165 #endif // EIGEN_USE_THREADS 174 Index inputIndex = 0;
175 if (static_cast<int>(Layout) == static_cast<int>(
ColMajor)) {
177 for (
int i = NumDims - 1;
i > 0; --
i) {
178 const Index idx = index / m_outputStrides[
i];
179 if (isPaddingAtIndexForDim(idx,
i)) {
180 return m_paddingValue;
182 inputIndex += (idx - m_padding[
i].first) * m_inputStrides[
i];
183 index -= idx * m_outputStrides[
i];
185 if (isPaddingAtIndexForDim(index, 0)) {
186 return m_paddingValue;
188 inputIndex += (index - m_padding[0].first);
191 for (
int i = 0;
i < NumDims - 1; ++
i) {
192 const Index idx = index / m_outputStrides[
i+1];
193 if (isPaddingAtIndexForDim(idx,
i)) {
194 return m_paddingValue;
196 inputIndex += (idx - m_padding[
i].first) * m_inputStrides[
i];
197 index -= idx * m_outputStrides[
i+1];
199 if (isPaddingAtIndexForDim(index, NumDims-1)) {
200 return m_paddingValue;
202 inputIndex += (index - m_padding[NumDims-1].first);
204 return m_impl.coeff(inputIndex);
207 template<
int LoadMode>
210 if (static_cast<int>(Layout) == static_cast<int>(
ColMajor)) {
211 return packetColMajor(index);
213 return packetRowMajor(index);
218 if (static_cast<int>(Layout) == static_cast<int>(
ColMajor)) {
220 for (
int i = 0;
i < NumDims; ++
i)
221 updateCostPerDimension(cost,
i,
i == 0);
224 for (
int i = NumDims - 1;
i >= 0; --
i)
225 updateCostPerDimension(cost,
i,
i == NumDims - 1);
232 const size_t target_size = m_device.lastLevelCacheSize();
234 internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size),
235 m_impl.getResourceRequirements());
239 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
240 bool =
false)
const {
242 if (desc.
size() == 0) {
247 static const bool IsColMajor = Layout ==
static_cast<int>(
ColMajor);
248 const int inner_dim_idx = IsColMajor ? 0 : NumDims - 1;
254 for (
int i = NumDims - 1;
i > 0; --
i) {
255 const int dim = IsColMajor ?
i : NumDims -
i - 1;
256 const int stride_dim = IsColMajor ? dim : dim + 1;
257 output_offsets[dim] = offset / m_outputStrides[stride_dim];
258 offset -= output_offsets[dim] * m_outputStrides[stride_dim];
260 output_offsets[inner_dim_idx] =
offset;
264 for (
int i = 0;
i < NumDims; ++
i) {
265 const int dim = IsColMajor ?
i : NumDims -
i - 1;
266 input_offsets[dim] = input_offsets[dim] - m_padding[dim].first;
272 Index input_offset = 0;
273 for (
int i = 0;
i < NumDims; ++
i) {
274 const int dim = IsColMajor ?
i : NumDims -
i - 1;
275 input_offset += input_offsets[dim] * m_inputStrides[dim];
281 Index output_offset = 0;
293 array<BlockIteratorState, NumDims - 1> it;
294 for (
int i = 0;
i < NumDims - 1; ++
i) {
295 const int dim = IsColMajor ?
i + 1 : NumDims -
i - 2;
299 it[
i].input_stride = m_inputStrides[dim];
300 it[
i].input_span = it[
i].input_stride * (it[
i].size - 1);
302 it[
i].output_stride = output_strides[dim];
303 it[
i].output_span = it[
i].output_stride * (it[
i].size - 1);
306 const Index input_inner_dim_size =
307 static_cast<Index
>(m_impl.dimensions()[inner_dim_idx]);
310 const Index output_size = desc.
size();
315 const Index output_inner_dim_size = desc.
dimension(inner_dim_idx);
319 const Index output_inner_pad_before_size =
320 input_offsets[inner_dim_idx] < 0
322 output_inner_dim_size)
328 (output_inner_dim_size - output_inner_pad_before_size),
330 numext::maxi(input_inner_dim_size - (input_offsets[inner_dim_idx] +
331 output_inner_pad_before_size),
338 const Index output_inner_pad_after_size =
339 (output_inner_dim_size - output_inner_copy_size -
340 output_inner_pad_before_size);
344 (output_inner_pad_before_size + output_inner_copy_size +
345 output_inner_pad_after_size));
350 for (
int i = 0;
i < NumDims; ++
i) {
351 const int dim = IsColMajor ?
i : NumDims -
i - 1;
352 output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
359 TensorBlock::prepareStorage(desc, scratch);
367 const bool squeeze_writes =
370 (input_inner_dim_size == m_dimensions[inner_dim_idx]) &&
372 (input_inner_dim_size == output_inner_dim_size);
374 const int squeeze_dim = IsColMajor ? inner_dim_idx + 1 : inner_dim_idx - 1;
377 const Index squeeze_max_coord =
380 static_cast<Index>(m_dimensions[squeeze_dim] -
381 m_padding[squeeze_dim].second),
383 static_cast<Index>(output_offsets[squeeze_dim] +
385 :
static_cast<Index
>(0);
388 for (Index
size = 0;
size < output_size;) {
390 bool is_padded =
false;
391 for (
int j = 1;
j < NumDims; ++
j) {
392 const int dim = IsColMajor ?
j : NumDims -
j - 1;
393 is_padded = output_padded[dim];
394 if (is_padded)
break;
399 size += output_inner_dim_size;
401 LinCopy::template Run<LinCopy::Kind::FillLinear>(
402 typename LinCopy::Dst(output_offset, 1, block_storage.
data()),
403 typename LinCopy::Src(0, 0, &m_paddingValue),
404 output_inner_dim_size);
407 }
else if (squeeze_writes) {
409 const Index squeeze_num = squeeze_max_coord - output_coord[squeeze_dim];
410 size += output_inner_dim_size * squeeze_num;
413 LinCopy::template Run<LinCopy::Kind::Linear>(
414 typename LinCopy::Dst(output_offset, 1, block_storage.
data()),
415 typename LinCopy::Src(input_offset, 1, m_impl.data()),
416 output_inner_dim_size * squeeze_num);
422 it[0].count += (squeeze_num - 1);
423 input_offset += it[0].input_stride * (squeeze_num - 1);
424 output_offset += it[0].output_stride * (squeeze_num - 1);
425 output_coord[squeeze_dim] += (squeeze_num - 1);
429 size += output_inner_dim_size;
432 const Index
out = output_offset;
434 LinCopy::template Run<LinCopy::Kind::FillLinear>(
435 typename LinCopy::Dst(out, 1, block_storage.
data()),
436 typename LinCopy::Src(0, 0, &m_paddingValue),
437 output_inner_pad_before_size);
441 const Index
out = output_offset + output_inner_pad_before_size;
442 const Index in = input_offset + output_inner_pad_before_size;
446 LinCopy::template Run<LinCopy::Kind::Linear>(
447 typename LinCopy::Dst(out, 1, block_storage.
data()),
448 typename LinCopy::Src(in, 1, m_impl.data()),
449 output_inner_copy_size);
453 const Index
out = output_offset + output_inner_pad_before_size +
454 output_inner_copy_size;
456 LinCopy::template Run<LinCopy::Kind::FillLinear>(
457 typename LinCopy::Dst(out, 1, block_storage.
data()),
458 typename LinCopy::Src(0, 0, &m_paddingValue),
459 output_inner_pad_after_size);
463 for (
int j = 0;
j < NumDims - 1; ++
j) {
464 const int dim = IsColMajor ?
j + 1 : NumDims -
j - 2;
466 if (++it[j].count < it[j].
size) {
467 input_offset += it[
j].input_stride;
468 output_offset += it[
j].output_stride;
469 output_coord[dim] += 1;
470 output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
474 input_offset -= it[
j].input_span;
475 output_offset -= it[
j].output_span;
476 output_coord[dim] -= it[
j].
size - 1;
477 output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
486 #ifdef EIGEN_USE_SYCL 494 struct BlockIteratorState {
512 Index index,
int dim_index)
const {
513 #if defined(EIGEN_HAS_INDEX_LIST) 514 return (!internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0) &&
515 index < m_padding[dim_index].
first) ||
516 (!internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0) &&
517 index >= m_dimensions[dim_index] - m_padding[dim_index].second);
519 return (index < m_padding[dim_index].first) ||
520 (index >= m_dimensions[dim_index] - m_padding[dim_index].second);
525 int dim_index)
const {
526 #if defined(EIGEN_HAS_INDEX_LIST) 527 return internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0);
535 int dim_index)
const {
536 #if defined(EIGEN_HAS_INDEX_LIST) 537 return internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0);
546 const double in =
static_cast<double>(m_impl.dimensions()[
i]);
547 const double out = in + m_padding[
i].first + m_padding[
i].second;
550 const double reduction = in /
out;
553 cost +=
TensorOpCost(0, 0, 2 * TensorOpCost::AddCost<Index>() +
554 reduction * (1 * TensorOpCost::AddCost<Index>()));
556 cost +=
TensorOpCost(0, 0, 2 * TensorOpCost::AddCost<Index>() +
557 2 * TensorOpCost::MulCost<Index>() +
558 reduction * (2 * TensorOpCost::MulCost<Index>() +
559 1 * TensorOpCost::DivCost<Index>()));
570 const Index initialIndex = index;
571 Index inputIndex = 0;
573 for (
int i = NumDims - 1;
i > 0; --
i) {
574 const Index firstIdx = index;
575 const Index lastIdx = index + PacketSize - 1;
576 const Index lastPaddedLeft = m_padding[
i].first * m_outputStrides[
i];
577 const Index firstPaddedRight = (m_dimensions[
i] - m_padding[
i].second) * m_outputStrides[
i];
578 const Index lastPaddedRight = m_outputStrides[
i+1];
580 if (!isLeftPaddingCompileTimeZero(
i) && lastIdx < lastPaddedLeft) {
582 return internal::pset1<PacketReturnType>(m_paddingValue);
584 else if (!isRightPaddingCompileTimeZero(
i) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
586 return internal::pset1<PacketReturnType>(m_paddingValue);
588 else if ((isLeftPaddingCompileTimeZero(
i) && isRightPaddingCompileTimeZero(
i)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
590 const Index idx = index / m_outputStrides[
i];
591 inputIndex += (idx - m_padding[
i].first) * m_inputStrides[
i];
592 index -= idx * m_outputStrides[
i];
596 return packetWithPossibleZero(initialIndex);
600 const Index lastIdx = index + PacketSize - 1;
601 const Index firstIdx = index;
602 const Index lastPaddedLeft = m_padding[0].first;
603 const Index firstPaddedRight = (m_dimensions[0] - m_padding[0].second);
604 const Index lastPaddedRight = m_outputStrides[1];
606 if (!isLeftPaddingCompileTimeZero(0) && lastIdx < lastPaddedLeft) {
608 return internal::pset1<PacketReturnType>(m_paddingValue);
610 else if (!isRightPaddingCompileTimeZero(0) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
612 return internal::pset1<PacketReturnType>(m_paddingValue);
614 else if ((isLeftPaddingCompileTimeZero(0) && isRightPaddingCompileTimeZero(0)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
616 inputIndex += (index - m_padding[0].first);
617 return m_impl.template packet<Unaligned>(inputIndex);
620 return packetWithPossibleZero(initialIndex);
628 const Index initialIndex = index;
629 Index inputIndex = 0;
631 for (
int i = 0;
i < NumDims - 1; ++
i) {
632 const Index firstIdx = index;
633 const Index lastIdx = index + PacketSize - 1;
634 const Index lastPaddedLeft = m_padding[
i].first * m_outputStrides[
i+1];
635 const Index firstPaddedRight = (m_dimensions[
i] - m_padding[
i].second) * m_outputStrides[
i+1];
636 const Index lastPaddedRight = m_outputStrides[
i];
638 if (!isLeftPaddingCompileTimeZero(
i) && lastIdx < lastPaddedLeft) {
640 return internal::pset1<PacketReturnType>(m_paddingValue);
642 else if (!isRightPaddingCompileTimeZero(
i) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
644 return internal::pset1<PacketReturnType>(m_paddingValue);
646 else if ((isLeftPaddingCompileTimeZero(
i) && isRightPaddingCompileTimeZero(
i)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
648 const Index idx = index / m_outputStrides[
i+1];
649 inputIndex += (idx - m_padding[
i].first) * m_inputStrides[
i];
650 index -= idx * m_outputStrides[
i+1];
654 return packetWithPossibleZero(initialIndex);
658 const Index lastIdx = index + PacketSize - 1;
659 const Index firstIdx = index;
660 const Index lastPaddedLeft = m_padding[NumDims-1].first;
661 const Index firstPaddedRight = (m_dimensions[NumDims-1] - m_padding[NumDims-1].second);
662 const Index lastPaddedRight = m_outputStrides[NumDims-1];
664 if (!isLeftPaddingCompileTimeZero(NumDims-1) && lastIdx < lastPaddedLeft) {
666 return internal::pset1<PacketReturnType>(m_paddingValue);
668 else if (!isRightPaddingCompileTimeZero(NumDims-1) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
670 return internal::pset1<PacketReturnType>(m_paddingValue);
672 else if ((isLeftPaddingCompileTimeZero(NumDims-1) && isRightPaddingCompileTimeZero(NumDims-1)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
674 inputIndex += (index - m_padding[NumDims-1].first);
675 return m_impl.template packet<Unaligned>(inputIndex);
678 return packetWithPossibleZero(initialIndex);
685 for (
int i = 0;
i < PacketSize; ++
i) {
686 values[
i] = coeff(index+
i);
688 PacketReturnType rslt = internal::pload<PacketReturnType>(
values);
708 #endif // EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
#define EIGEN_ALWAYS_INLINE
internal::remove_const< Scalar >::type ScalarNoConst
#define EIGEN_STRONG_INLINE
EIGEN_DEVICE_FUNC Scalar padding_value() const
EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE std::size_t size()
std::ofstream out("Result.txt")
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
PacketType< CoeffReturnType, Device >::type PacketReturnType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const
EIGEN_DEVICE_FUNC const PaddingDimensions & padding() const
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy y set format x g set format y g set format x2 g set format y2 g set format z g set angles radians set nogrid set key title set key left top Right noreverse box linetype linewidth samplen spacing width set nolabel set noarrow set nologscale set logscale x set set pointsize set encoding default set nopolar set noparametric set set set set surface set nocontour set clabel set mapping cartesian set nohidden3d set cntrparam order set cntrparam linear set cntrparam levels auto set cntrparam points set size set set xzeroaxis lt lw set x2zeroaxis lt lw set yzeroaxis lt lw set y2zeroaxis lt lw set tics in set ticslevel set tics set mxtics default set mytics default set mx2tics default set my2tics default set xtics border mirror norotate autofreq set ytics border mirror norotate autofreq set ztics border nomirror norotate autofreq set nox2tics set noy2tics set timestamp bottom norotate offset
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Namespace containing all symbols from the Eigen library.
A cost model used to limit the number of threads used for evaluating tensor expression.
Storage::Type EvaluatorPointerType
TensorMaterializedBlock AsTensorMaterializedBlock() const
EIGEN_STRONG_INLINE void cleanup()
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const
#define EIGEN_STATIC_ASSERT(CONDITION, MSG)
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T maxi(const T &x, const T &y)
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isPaddingAtIndexForDim(Index index, int dim_index) const
EIGEN_CONSTEXPR Index first(const T &x) EIGEN_NOEXCEPT
Eigen::NumTraits< Scalar >::Real RealScalar
IndexType dimension(int index) const
const TensorPaddingOp< PaddingDimensions, XprType > & type
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Eigen::internal::traits< TensorPaddingOp >::Scalar Scalar
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isRightPaddingCompileTimeZero(int dim_index) const
Generic expression where a coefficient-wise binary operator is applied to two expressions.
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE internal::enable_if< NumTraits< T >::IsSigned||NumTraits< T >::IsComplex, typename NumTraits< T >::Real >::type abs(const T &x)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
XprTraits::StorageKind StorageKind
const Device EIGEN_DEVICE_REF m_device
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorPaddingOp(const XprType &expr, const PaddingDimensions &padding_dims, const Scalar padding_value)
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
array< Index, NumDims > m_inputStrides
TensorEvaluator< ArgType, Device > m_impl
TensorPaddingOp< PaddingDimensions, XprType > type
const Scalar m_padding_value
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
EIGEN_DEVICE_FUNC const internal::remove_all< typename XprType::Nested >::type & expression() const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const
const PaddingDimensions m_padding_dims
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T mini(const T &x, const T &y)
PaddingDimensions m_padding
XprType::CoeffReturnType CoeffReturnType
remove_reference< Nested >::type _Nested
Eigen::internal::nested< TensorPaddingOp >::type Nested
EIGEN_CONSTEXPR Index size(const T &x)
#define EIGEN_DEVICE_FUNC
TensorPaddingOp< PaddingDimensions, ArgType > XprType
XprTraits::PointerType PointerType
const Dimensions & dimensions() const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const
Eigen::internal::traits< TensorPaddingOp >::StorageKind StorageKind
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
DSizes< Index, NumDims > Dimensions
traits< XprType > XprTraits
array< Index, NumDims+1 > m_outputStrides
Generic expression where a coefficient-wise unary operator is applied to an expression.
XprType::CoeffReturnType CoeffReturnType
const std::vector< size_t > dimensions
StorageMemory< CoeffReturnType, Device > Storage
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockResourceRequirements merge(const TensorBlockResourceRequirements &lhs, const TensorBlockResourceRequirements &rhs)
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isLeftPaddingCompileTimeZero(int dim_index) const
Eigen::internal::traits< TensorPaddingOp >::Index Index
#define EIGEN_UNUSED_VARIABLE(var)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
#define EIGEN_UNROLL_LOOP
internal::TensorMaterializedBlock< ScalarNoConst, NumDims, Layout, Index > TensorBlock
void updateCostPerDimension(TensorOpCost &cost, int i, bool first) const