10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_PADDING_H 
   11 #define EIGEN_CXX11_TENSOR_TENSOR_PADDING_H 
   23 template<
typename PaddingDimensions, 
typename XprType>
 
   30   typedef typename XprType::Nested 
Nested;
 
   32   static const int NumDimensions = XprTraits::NumDimensions;
 
   33   static const int Layout = XprTraits::Layout;
 
   37 template<
typename PaddingDimensions, 
typename XprType>
 
   43 template<
typename PaddingDimensions, 
typename XprType>
 
   53 template<
typename PaddingDimensions, 
typename XprType>
 
   84 template<
typename PaddingDimensions, 
typename ArgType, 
typename Device>
 
  120       : m_impl(op.expression(), device), m_padding(op.padding()), m_paddingValue(op.padding_value()), 
m_device(device)
 
  128     m_dimensions = m_impl.dimensions();
 
  129     for (
int i = 0; 
i < NumDims; ++
i) {
 
  130       m_dimensions[
i] += m_padding[
i].first + m_padding[
i].second;
 
  134       m_inputStrides[0] = 1;
 
  135       m_outputStrides[0] = 1;
 
  136       for (
int i = 1; 
i < NumDims; ++
i) {
 
  137         m_inputStrides[
i] = m_inputStrides[
i-1] * input_dims[
i-1];
 
  138         m_outputStrides[
i] = m_outputStrides[
i-1] * m_dimensions[
i-1];
 
  140       m_outputStrides[NumDims] = m_outputStrides[NumDims-1] * m_dimensions[NumDims-1];
 
  142       m_inputStrides[NumDims - 1] = 1;
 
  143       m_outputStrides[NumDims] = 1;
 
  144       for (
int i = NumDims - 2; 
i >= 0; --
i) {
 
  145         m_inputStrides[
i] = m_inputStrides[
i+1] * input_dims[
i+1];
 
  146         m_outputStrides[
i+1] = m_outputStrides[
i+2] * m_dimensions[
i+1];
 
  148       m_outputStrides[0] = m_outputStrides[1] * m_dimensions[0];
 
  155     m_impl.evalSubExprsIfNeeded(
NULL);
 
  159 #ifdef EIGEN_USE_THREADS 
  160   template <
typename EvalSubExprsCallback>
 
  163     m_impl.evalSubExprsIfNeededAsync(
nullptr, [done](
bool) { done(
true); });
 
  165 #endif  // EIGEN_USE_THREADS 
  174     Index inputIndex = 0;
 
  177       for (
int i = NumDims - 1; 
i > 0; --
i) {
 
  178         const Index idx = index / m_outputStrides[
i];
 
  179         if (isPaddingAtIndexForDim(idx, 
i)) {
 
  180           return m_paddingValue;
 
  182         inputIndex += (idx - m_padding[
i].first) * m_inputStrides[
i];
 
  183         index -= idx * m_outputStrides[
i];
 
  185       if (isPaddingAtIndexForDim(index, 0)) {
 
  186         return m_paddingValue;
 
  188       inputIndex += (index - m_padding[0].first);
 
  191       for (
int i = 0; 
i < NumDims - 1; ++
i) {
 
  192         const Index idx = index / m_outputStrides[
i+1];
 
  193         if (isPaddingAtIndexForDim(idx, 
i)) {
 
  194           return m_paddingValue;
 
  196         inputIndex += (idx - m_padding[
i].first) * m_inputStrides[
i];
 
  197         index -= idx * m_outputStrides[
i+1];
 
  199       if (isPaddingAtIndexForDim(index, NumDims-1)) {
 
  200         return m_paddingValue;
 
  202       inputIndex += (index - m_padding[NumDims-1].first);
 
  204     return m_impl.coeff(inputIndex);
 
  207   template<
int LoadMode>
 
  211       return packetColMajor(index);
 
  213     return packetRowMajor(index);
 
  220       for (
int i = 0; 
i < NumDims; ++
i)
 
  221         updateCostPerDimension(cost, 
i, 
i == 0);
 
  224       for (
int i = NumDims - 1; 
i >= 0; --
i)
 
  225         updateCostPerDimension(cost, 
i, 
i == NumDims - 1);
 
  232     const size_t target_size = 
m_device.lastLevelCacheSize();
 
  234         internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size),
 
  235         m_impl.getResourceRequirements());
 
  240           bool  = 
false)
 const {
 
  242     if (
desc.size() == 0) {
 
  247     static const bool IsColMajor = 
Layout == 
static_cast<int>(
ColMajor);
 
  248     const int inner_dim_idx = IsColMajor ? 0 : NumDims - 1;
 
  254     for (
int i = NumDims - 1; 
i > 0; --
i) {
 
  255       const int dim = IsColMajor ? 
i : NumDims - 
i - 1;
 
  256       const int stride_dim = IsColMajor ? dim : dim + 1;
 
  257       output_offsets[dim] = 
offset / m_outputStrides[stride_dim];
 
  258       offset -= output_offsets[dim] * m_outputStrides[stride_dim];
 
  260     output_offsets[inner_dim_idx] = 
offset;
 
  264     for (
int i = 0; 
i < NumDims; ++
i) {
 
  265       const int dim = IsColMajor ? 
i : NumDims - 
i - 1;
 
  266       input_offsets[dim] = input_offsets[dim] - m_padding[dim].first;
 
  272     Index input_offset = 0;
 
  273     for (
int i = 0; 
i < NumDims; ++
i) {
 
  274       const int dim = IsColMajor ? 
i : NumDims - 
i - 1;
 
  275       input_offset += input_offsets[dim] * m_inputStrides[dim];
 
  281     Index output_offset = 0;
 
  283         internal::strides<Layout>(
desc.dimensions());
 
  293     array<BlockIteratorState, NumDims - 1> it;
 
  294     for (
int i = 0; 
i < NumDims - 1; ++
i) {
 
  295       const int dim = IsColMajor ? 
i + 1 : NumDims - 
i - 2;
 
  297       it[
i].size = 
desc.dimension(dim);
 
  299       it[
i].input_stride = m_inputStrides[dim];
 
  300       it[
i].input_span = it[
i].input_stride * (it[
i].size - 1);
 
  302       it[
i].output_stride = output_strides[dim];
 
  303       it[
i].output_span = it[
i].output_stride * (it[
i].size - 1);
 
  306     const Index input_inner_dim_size =
 
  307         static_cast<Index>(m_impl.dimensions()[inner_dim_idx]);
 
  315     const Index output_inner_dim_size = 
desc.dimension(inner_dim_idx);
 
  319     const Index output_inner_pad_before_size =
 
  320         input_offsets[inner_dim_idx] < 0
 
  322                            output_inner_dim_size)
 
  328         (output_inner_dim_size - output_inner_pad_before_size),
 
  330         numext::maxi(input_inner_dim_size - (input_offsets[inner_dim_idx] +
 
  331                                              output_inner_pad_before_size),
 
  338     const Index output_inner_pad_after_size =
 
  339         (output_inner_dim_size - output_inner_copy_size -
 
  340          output_inner_pad_before_size);
 
  344                  (output_inner_pad_before_size + output_inner_copy_size +
 
  345                   output_inner_pad_after_size));
 
  350     for (
int i = 0; 
i < NumDims; ++
i) {
 
  351       const int dim = IsColMajor ? 
i : NumDims - 
i - 1;
 
  352       output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
 
  367     const bool squeeze_writes =
 
  370         (input_inner_dim_size == m_dimensions[inner_dim_idx]) &&
 
  372         (input_inner_dim_size == output_inner_dim_size);
 
  374     const int squeeze_dim = IsColMajor ? inner_dim_idx + 1 : inner_dim_idx - 1;
 
  377     const Index squeeze_max_coord =
 
  380                              static_cast<Index>(m_dimensions[squeeze_dim] -
 
  381                                                 m_padding[squeeze_dim].second),
 
  383                              static_cast<Index>(output_offsets[squeeze_dim] +
 
  384                                                 desc.dimension(squeeze_dim)))
 
  385                        : 
static_cast<Index>(0);
 
  390       bool is_padded = 
false;
 
  391       for (
int j = 1; 
j < NumDims; ++
j) {
 
  392         const int dim = IsColMajor ? 
j : NumDims - 
j - 1;
 
  393         is_padded = output_padded[dim];
 
  394         if (is_padded) 
break;
 
  399         size += output_inner_dim_size;
 
  401         LinCopy::template Run<LinCopy::Kind::FillLinear>(
 
  402             typename LinCopy::Dst(output_offset, 1, block_storage.
data()),
 
  403             typename LinCopy::Src(0, 0, &m_paddingValue),
 
  404             output_inner_dim_size);
 
  407       } 
else if (squeeze_writes) {
 
  409         const Index squeeze_num = squeeze_max_coord - output_coord[squeeze_dim];
 
  410         size += output_inner_dim_size * squeeze_num;
 
  413         LinCopy::template Run<LinCopy::Kind::Linear>(
 
  414             typename LinCopy::Dst(output_offset, 1, block_storage.
data()),
 
  415             typename LinCopy::Src(input_offset, 1, m_impl.data()),
 
  416             output_inner_dim_size * squeeze_num);
 
  422         it[0].count += (squeeze_num - 1);
 
  423         input_offset += it[0].input_stride * (squeeze_num - 1);
 
  424         output_offset += it[0].output_stride * (squeeze_num - 1);
 
  425         output_coord[squeeze_dim] += (squeeze_num - 1);
 
  429         size += output_inner_dim_size;
 
  434           LinCopy::template Run<LinCopy::Kind::FillLinear>(
 
  435               typename LinCopy::Dst(
out, 1, block_storage.
data()),
 
  436               typename LinCopy::Src(0, 0, &m_paddingValue),
 
  437               output_inner_pad_before_size);
 
  441           const Index out = output_offset + output_inner_pad_before_size;
 
  442           const Index in = input_offset + output_inner_pad_before_size;
 
  446           LinCopy::template Run<LinCopy::Kind::Linear>(
 
  447               typename LinCopy::Dst(
out, 1, block_storage.
data()),
 
  448               typename LinCopy::Src(in, 1, m_impl.data()),
 
  449               output_inner_copy_size);
 
  453           const Index out = output_offset + output_inner_pad_before_size +
 
  454                             output_inner_copy_size;
 
  456           LinCopy::template Run<LinCopy::Kind::FillLinear>(
 
  457               typename LinCopy::Dst(
out, 1, block_storage.
data()),
 
  458               typename LinCopy::Src(0, 0, &m_paddingValue),
 
  459               output_inner_pad_after_size);
 
  463       for (
int j = 0; 
j < NumDims - 1; ++
j) {
 
  464         const int dim = IsColMajor ? 
j + 1 : NumDims - 
j - 2;
 
  466         if (++it[
j].count < it[
j].
size) {
 
  467           input_offset += it[
j].input_stride;
 
  468           output_offset += it[
j].output_stride;
 
  469           output_coord[dim] += 1;
 
  470           output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
 
  474         input_offset -= it[
j].input_span;
 
  475         output_offset -= it[
j].output_span;
 
  476         output_coord[dim] -= it[
j].
size - 1;
 
  477         output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
 
  486 #ifdef EIGEN_USE_SYCL 
  494   struct BlockIteratorState {
 
  512       Index index, 
int dim_index)
 const {
 
  513 #if defined(EIGEN_HAS_INDEX_LIST) 
  514     return (!internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0) &&
 
  515             index < m_padding[dim_index].first) ||
 
  516         (!internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0) &&
 
  517          index >= m_dimensions[dim_index] - m_padding[dim_index].second);
 
  519     return (index < m_padding[dim_index].first) ||
 
  520            (index >= m_dimensions[dim_index] - m_padding[dim_index].second);
 
  525       int dim_index)
 const {
 
  526 #if defined(EIGEN_HAS_INDEX_LIST) 
  527     return internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0);
 
  535       int dim_index)
 const {
 
  536 #if defined(EIGEN_HAS_INDEX_LIST) 
  537     return internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0);
 
  546     const double in = 
static_cast<double>(m_impl.dimensions()[
i]);
 
  547     const double out = in + m_padding[
i].first + m_padding[
i].second;
 
  550     const double reduction = in / 
out;
 
  553       cost += 
TensorOpCost(0, 0, 2 * TensorOpCost::AddCost<Index>() +
 
  554                     reduction * (1 * TensorOpCost::AddCost<Index>()));
 
  556       cost += 
TensorOpCost(0, 0, 2 * TensorOpCost::AddCost<Index>() +
 
  557                                  2 * TensorOpCost::MulCost<Index>() +
 
  558                     reduction * (2 * TensorOpCost::MulCost<Index>() +
 
  559                                  1 * TensorOpCost::DivCost<Index>()));
 
  570     const Index initialIndex = index;
 
  571     Index inputIndex = 0;
 
  573     for (
int i = NumDims - 1; 
i > 0; --
i) {
 
  574       const Index firstIdx = index;
 
  576       const Index lastPaddedLeft = m_padding[
i].first * m_outputStrides[
i];
 
  577       const Index firstPaddedRight = (m_dimensions[
i] - m_padding[
i].second) * m_outputStrides[
i];
 
  578       const Index lastPaddedRight = m_outputStrides[
i+1];
 
  580       if (!isLeftPaddingCompileTimeZero(
i) && lastIdx < lastPaddedLeft) {
 
  582         return internal::pset1<PacketReturnType>(m_paddingValue);
 
  584       else if (!isRightPaddingCompileTimeZero(
i) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
 
  586         return internal::pset1<PacketReturnType>(m_paddingValue);
 
  588       else if ((isLeftPaddingCompileTimeZero(
i) && isRightPaddingCompileTimeZero(
i)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
 
  590         const Index idx = index / m_outputStrides[
i];
 
  591         inputIndex += (idx - m_padding[
i].first) * m_inputStrides[
i];
 
  592         index -= idx * m_outputStrides[
i];
 
  596         return packetWithPossibleZero(initialIndex);
 
  601     const Index firstIdx = index;
 
  602     const Index lastPaddedLeft = m_padding[0].first;
 
  603     const Index firstPaddedRight = (m_dimensions[0] - m_padding[0].second);
 
  604     const Index lastPaddedRight = m_outputStrides[1];
 
  606     if (!isLeftPaddingCompileTimeZero(0) && lastIdx < lastPaddedLeft) {
 
  608       return internal::pset1<PacketReturnType>(m_paddingValue);
 
  610     else if (!isRightPaddingCompileTimeZero(0) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
 
  612       return internal::pset1<PacketReturnType>(m_paddingValue);
 
  614     else if ((isLeftPaddingCompileTimeZero(0) && isRightPaddingCompileTimeZero(0)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
 
  616       inputIndex += (index - m_padding[0].first);
 
  617       return m_impl.template packet<Unaligned>(inputIndex);
 
  620     return packetWithPossibleZero(initialIndex);
 
  628     const Index initialIndex = index;
 
  629     Index inputIndex = 0;
 
  631     for (
int i = 0; 
i < NumDims - 1; ++
i) {
 
  632       const Index firstIdx = index;
 
  634       const Index lastPaddedLeft = m_padding[
i].first * m_outputStrides[
i+1];
 
  635       const Index firstPaddedRight = (m_dimensions[
i] - m_padding[
i].second) * m_outputStrides[
i+1];
 
  636       const Index lastPaddedRight = m_outputStrides[
i];
 
  638       if (!isLeftPaddingCompileTimeZero(
i) && lastIdx < lastPaddedLeft) {
 
  640         return internal::pset1<PacketReturnType>(m_paddingValue);
 
  642       else if (!isRightPaddingCompileTimeZero(
i) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
 
  644         return internal::pset1<PacketReturnType>(m_paddingValue);
 
  646       else if ((isLeftPaddingCompileTimeZero(
i) && isRightPaddingCompileTimeZero(
i)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
 
  648         const Index idx = index / m_outputStrides[
i+1];
 
  649         inputIndex += (idx - m_padding[
i].first) * m_inputStrides[
i];
 
  650         index -= idx * m_outputStrides[
i+1];
 
  654         return packetWithPossibleZero(initialIndex);
 
  659     const Index firstIdx = index;
 
  660     const Index lastPaddedLeft = m_padding[NumDims-1].first;
 
  661     const Index firstPaddedRight = (m_dimensions[NumDims-1] - m_padding[NumDims-1].second);
 
  662     const Index lastPaddedRight = m_outputStrides[NumDims-1];
 
  664     if (!isLeftPaddingCompileTimeZero(NumDims-1) && lastIdx < lastPaddedLeft) {
 
  666       return internal::pset1<PacketReturnType>(m_paddingValue);
 
  668     else if (!isRightPaddingCompileTimeZero(NumDims-1) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
 
  670       return internal::pset1<PacketReturnType>(m_paddingValue);
 
  672     else if ((isLeftPaddingCompileTimeZero(NumDims-1) && isRightPaddingCompileTimeZero(NumDims-1)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
 
  674       inputIndex += (index - m_padding[NumDims-1].first);
 
  675       return m_impl.template packet<Unaligned>(inputIndex);
 
  678     return packetWithPossibleZero(initialIndex);
 
  708 #endif // EIGEN_CXX11_TENSOR_TENSOR_PADDING_H