10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H
23 template<
typename Shuffle,
typename XprType>
30 typedef typename XprType::Nested
Nested;
32 static const int NumDimensions = XprTraits::NumDimensions;
33 static const int Layout = XprTraits::Layout;
37 template<
typename Shuffle,
typename XprType>
43 template<
typename Shuffle,
typename XprType>
53 template<
typename Shuffle,
typename XprType>
85 template<
typename Shuffle,
typename ArgType,
typename Device>
123 m_impl(op.expression(), device)
127 m_is_identity =
true;
128 for (
int i = 0;
i < NumDims; ++
i) {
129 m_shuffle[
i] =
static_cast<int>(shuffle[
i]);
130 m_dimensions[
i] = input_dims[shuffle[
i]];
131 m_inverseShuffle[shuffle[
i]] =
i;
132 if (m_is_identity && shuffle[
i] !=
i) {
133 m_is_identity =
false;
138 m_unshuffledInputStrides[0] = 1;
139 m_outputStrides[0] = 1;
141 for (
int i = 1;
i < NumDims; ++
i) {
142 m_unshuffledInputStrides[
i] =
143 m_unshuffledInputStrides[
i - 1] * input_dims[
i - 1];
144 m_outputStrides[
i] = m_outputStrides[
i - 1] * m_dimensions[
i - 1];
146 m_outputStrides[
i] > 0 ? m_outputStrides[
i] :
Index(1));
149 m_unshuffledInputStrides[NumDims - 1] = 1;
150 m_outputStrides[NumDims - 1] = 1;
151 for (
int i = NumDims - 2;
i >= 0; --
i) {
152 m_unshuffledInputStrides[
i] =
153 m_unshuffledInputStrides[
i + 1] * input_dims[
i + 1];
154 m_outputStrides[
i] = m_outputStrides[
i + 1] * m_dimensions[
i + 1];
156 m_outputStrides[
i] > 0 ? m_outputStrides[
i] :
Index(1));
160 for (
int i = 0;
i < NumDims; ++
i) {
161 m_inputStrides[
i] = m_unshuffledInputStrides[shuffle[
i]];
168 m_impl.evalSubExprsIfNeeded(
NULL);
172 #ifdef EIGEN_USE_THREADS
173 template <
typename EvalSubExprsCallback>
176 m_impl.evalSubExprsIfNeededAsync(
nullptr, [done](
bool) { done(
true); });
178 #endif // EIGEN_USE_THREADS
187 return m_impl.coeff(index);
189 return m_impl.coeff(srcCoeff(index));
193 template <
int LoadMode,
typename Self,
bool ImplPacketAccess>
194 struct PacketLoader {
207 template<
int LoadMode,
typename Self>
208 struct PacketLoader<LoadMode,
Self, true> {
211 if (
self.m_is_identity) {
212 return self.m_impl.template packet<LoadMode>(index);
225 template<
int LoadMode>
230 return PacketLoader<LoadMode, Self, TensorEvaluator<ArgType, Device>::PacketAccess>::Run(*
this, index);
235 static const int inner_dim =
238 const size_t target_size =
m_device.firstLevelCacheSize();
239 const bool inner_dim_shuffled = m_shuffle[inner_dim] != inner_dim;
247 if (inner_dim_shuffled) {
248 return BlockRequirements::uniform<Scalar>(target_size)
251 return BlockRequirements::skewed<Scalar>(target_size);
257 bool root_of_expr_ast =
false)
const {
258 assert(m_impl.data() !=
NULL);
262 typedef typename TensorBlockIO::Dst TensorBlockIODst;
263 typedef typename TensorBlockIO::Src TensorBlockIOSrc;
267 desc, scratch, root_of_expr_ast);
269 typename TensorBlockIO::Dimensions input_strides(m_unshuffledInputStrides);
270 TensorBlockIOSrc src(input_strides, m_impl.data(), srcCoeff(
desc.offset()));
273 block_storage.
data());
275 typename TensorBlockIO::DimensionsMap dst_to_src_dim_map(m_shuffle);
276 TensorBlockIO::Copy(dst, src, dst_to_src_dim_map);
282 const double compute_cost = m_is_identity ? TensorOpCost::AddCost<Index>() :
283 NumDims * (2 * TensorOpCost::AddCost<Index>() +
284 2 * TensorOpCost::MulCost<Index>() +
285 TensorOpCost::DivCost<Index>());
286 return m_impl.costPerCoeff(vectorized) +
292 #ifdef EIGEN_USE_SYCL
304 Index output_index = 0;
306 for (
int i = NumDims - 1;
i > 0; --
i) {
307 const Index idx = input_index / fast_input_block_strides[
i];
308 output_index += idx * output_block_strides[m_inverseShuffle[
i]];
309 input_index -= idx * input_block_strides[
i];
311 return output_index + input_index *
312 output_block_strides[m_inverseShuffle[0]];
314 for (
int i = 0;
i < NumDims - 1; ++
i) {
315 const Index idx = input_index / fast_input_block_strides[
i];
316 output_index += idx * output_block_strides[m_inverseShuffle[
i]];
317 input_index -= idx * input_block_strides[
i];
319 return output_index + input_index *
320 output_block_strides[m_inverseShuffle[NumDims - 1]];
325 Index inputIndex = 0;
327 for (
int i = NumDims - 1;
i > 0; --
i) {
328 const Index idx = index / m_fastOutputStrides[
i];
329 inputIndex += idx * m_inputStrides[
i];
330 index -= idx * m_outputStrides[
i];
332 return inputIndex + index * m_inputStrides[0];
334 for (
int i = 0;
i < NumDims - 1; ++
i) {
335 const Index idx = index / m_fastOutputStrides[
i];
336 inputIndex += idx * m_inputStrides[
i];
337 index -= idx * m_outputStrides[
i];
339 return inputIndex + index * m_inputStrides[NumDims - 1];
358 template<
typename Shuffle,
typename ArgType,
typename Device>
360 :
public TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
394 return this->m_impl.coeffRef(this->srcCoeff(index));
403 internal::pstore<CoeffReturnType, PacketReturnType>(
values,
x);
410 template <
typename TensorBlock>
417 typedef typename TensorBlockIO::Dst TensorBlockIODst;
418 typedef typename TensorBlockIO::Src TensorBlockIOSrc;
425 if (block_buffer ==
NULL) {
431 TensorBlockAssignment;
433 TensorBlockAssignment::Run(
434 TensorBlockAssignment::target(
435 desc.dimensions(), internal::strides<Layout>(
desc.dimensions()),
443 TensorBlockIOSrc src(internal::strides<Layout>(
desc.dimensions()),
447 typename TensorBlockIO::Dimensions output_strides(
448 this->m_unshuffledInputStrides);
449 typename TensorBlockIO::Dimensions output_dimensions;
450 for (
int i = 0;
i < NumDims; ++
i) {
451 output_dimensions[this->m_shuffle[
i]] =
desc.dimension(
i);
453 TensorBlockIODst dst(output_dimensions, output_strides, this->m_impl.data(),
457 typename TensorBlockIO::DimensionsMap dst_to_src_dim_map;
458 for (
int i = 0;
i < NumDims; ++
i) {
459 dst_to_src_dim_map[
i] =
static_cast<int>(this->m_inverseShuffle[
i]);
461 TensorBlockIO::Copy(dst, src, dst_to_src_dim_map);
471 #endif // EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H