10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H 23 template<
typename Shuffle,
typename XprType>
30 typedef typename XprType::Nested
Nested;
32 static const int NumDimensions = XprTraits::NumDimensions;
33 static const int Layout = XprTraits::Layout;
37 template<
typename Shuffle,
typename XprType>
43 template<
typename Shuffle,
typename XprType>
53 template<
typename Shuffle,
typename XprType>
66 : m_xpr(expr), m_shuffle(shfl) {}
85 template<
typename Shuffle,
typename ArgType,
typename Device>
104 PreferBlockAccess =
true,
123 m_impl(op.expression(), device)
127 m_is_identity =
true;
128 for (
int i = 0;
i < NumDims; ++
i) {
129 m_shuffle[
i] =
static_cast<int>(shuffle[
i]);
130 m_dimensions[
i] = input_dims[shuffle[
i]];
131 m_inverseShuffle[shuffle[
i]] =
i;
132 if (m_is_identity && shuffle[
i] !=
i) {
133 m_is_identity =
false;
137 if (static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
138 m_unshuffledInputStrides[0] = 1;
139 m_outputStrides[0] = 1;
141 for (
int i = 1;
i < NumDims; ++
i) {
142 m_unshuffledInputStrides[
i] =
143 m_unshuffledInputStrides[
i - 1] * input_dims[
i - 1];
144 m_outputStrides[
i] = m_outputStrides[
i - 1] * m_dimensions[
i - 1];
146 m_outputStrides[
i] > 0 ? m_outputStrides[
i] :
Index(1));
149 m_unshuffledInputStrides[NumDims - 1] = 1;
150 m_outputStrides[NumDims - 1] = 1;
151 for (
int i = NumDims - 2;
i >= 0; --
i) {
152 m_unshuffledInputStrides[
i] =
153 m_unshuffledInputStrides[
i + 1] * input_dims[
i + 1];
154 m_outputStrides[
i] = m_outputStrides[
i + 1] * m_dimensions[
i + 1];
156 m_outputStrides[
i] > 0 ? m_outputStrides[
i] :
Index(1));
160 for (
int i = 0;
i < NumDims; ++
i) {
161 m_inputStrides[
i] = m_unshuffledInputStrides[shuffle[
i]];
168 m_impl.evalSubExprsIfNeeded(
NULL);
172 #ifdef EIGEN_USE_THREADS 173 template <
typename EvalSubExprsCallback>
175 EvaluatorPointerType, EvalSubExprsCallback done) {
176 m_impl.evalSubExprsIfNeededAsync(
nullptr, [done](
bool) { done(
true); });
178 #endif // EIGEN_USE_THREADS 187 return m_impl.coeff(index);
189 return m_impl.coeff(srcCoeff(index));
193 template <
int LoadMode,
typename Self,
bool ImplPacketAccess>
194 struct PacketLoader {
196 static PacketReturnType
Run(
const Self&
self, Index index) {
199 for (
int i = 0;
i < PacketSize; ++
i) {
200 values[
i] =
self.coeff(index +
i);
202 PacketReturnType rslt = internal::pload<PacketReturnType>(
values);
207 template<
int LoadMode,
typename Self>
208 struct PacketLoader<LoadMode, Self, true> {
210 static PacketReturnType
Run(
const Self&
self, Index index) {
211 if (
self.m_is_identity) {
212 return self.m_impl.template packet<LoadMode>(index);
216 for (
int i = 0;
i < PacketSize; ++
i) {
217 values[
i] =
self.coeff(index +
i);
219 PacketReturnType rslt = internal::pload<PacketReturnType>(
values);
225 template<
int LoadMode>
230 return PacketLoader<LoadMode, Self, TensorEvaluator<ArgType, Device>::PacketAccess>::Run(*
this, index);
235 static const int inner_dim =
236 Layout ==
static_cast<int>(
ColMajor) ? 0 : NumDims - 1;
238 const size_t target_size = m_device.firstLevelCacheSize();
239 const bool inner_dim_shuffled = m_shuffle[inner_dim] != inner_dim;
247 if (inner_dim_shuffled) {
248 return BlockRequirements::uniform<Scalar>(target_size)
249 .addCostPerCoeff({0, 0, NumDims * 28});
251 return BlockRequirements::skewed<Scalar>(target_size);
256 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
257 bool root_of_expr_ast =
false)
const {
258 assert(m_impl.data() !=
NULL);
262 typedef typename TensorBlockIO::Dst TensorBlockIODst;
263 typedef typename TensorBlockIO::Src TensorBlockIOSrc;
266 TensorBlock::prepareStorage(
267 desc, scratch, root_of_expr_ast);
270 TensorBlockIOSrc src(input_strides, m_impl.data(), srcCoeff(desc.
offset()));
273 block_storage.
data());
282 const double compute_cost = m_is_identity ? TensorOpCost::AddCost<Index>() :
283 NumDims * (2 * TensorOpCost::AddCost<Index>() +
284 2 * TensorOpCost::MulCost<Index>() +
285 TensorOpCost::DivCost<Index>());
286 return m_impl.costPerCoeff(vectorized) +
287 TensorOpCost(0, 0, compute_cost, m_is_identity , PacketSize);
292 #ifdef EIGEN_USE_SYCL 304 Index output_index = 0;
305 if (static_cast<int>(Layout) == static_cast<int>(
ColMajor)) {
306 for (
int i = NumDims - 1;
i > 0; --
i) {
307 const Index idx = input_index / fast_input_block_strides[
i];
308 output_index += idx * output_block_strides[m_inverseShuffle[
i]];
309 input_index -= idx * input_block_strides[
i];
311 return output_index + input_index *
312 output_block_strides[m_inverseShuffle[0]];
314 for (
int i = 0;
i < NumDims - 1; ++
i) {
315 const Index idx = input_index / fast_input_block_strides[
i];
316 output_index += idx * output_block_strides[m_inverseShuffle[
i]];
317 input_index -= idx * input_block_strides[
i];
319 return output_index + input_index *
320 output_block_strides[m_inverseShuffle[NumDims - 1]];
325 Index inputIndex = 0;
326 if (static_cast<int>(Layout) == static_cast<int>(
ColMajor)) {
327 for (
int i = NumDims - 1;
i > 0; --
i) {
328 const Index idx = index / m_fastOutputStrides[
i];
329 inputIndex += idx * m_inputStrides[
i];
330 index -= idx * m_outputStrides[
i];
332 return inputIndex + index * m_inputStrides[0];
334 for (
int i = 0;
i < NumDims - 1; ++
i) {
335 const Index idx = index / m_fastOutputStrides[
i];
336 inputIndex += idx * m_inputStrides[
i];
337 index -= idx * m_outputStrides[
i];
339 return inputIndex + index * m_inputStrides[NumDims - 1];
358 template<
typename Shuffle,
typename ArgType,
typename Device>
360 :
public TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
377 PreferBlockAccess =
true,
394 return this->m_impl.coeffRef(this->srcCoeff(index));
403 internal::pstore<CoeffReturnType, PacketReturnType>(
values,
x);
405 for (
int i = 0;
i < PacketSize; ++
i) {
406 this->coeffRef(index+
i) = values[
i];
410 template <
typename TensorBlock>
417 typedef typename TensorBlockIO::Dst TensorBlockIODst;
418 typedef typename TensorBlockIO::Src TensorBlockIOSrc;
420 const Scalar* block_buffer = block.
data();
425 if (block_buffer ==
NULL) {
426 mem = this->m_device.allocate(desc.
size() *
sizeof(
Scalar));
427 ScalarNoConst* buf =
static_cast<ScalarNoConst*
>(mem);
431 TensorBlockAssignment;
443 TensorBlockIOSrc src(internal::strides<Layout>(desc.
dimensions()),
448 this->m_unshuffledInputStrides);
450 for (
int i = 0;
i < NumDims; ++
i) {
451 output_dimensions[this->m_shuffle[
i]] = desc.
dimension(
i);
453 TensorBlockIODst dst(output_dimensions, output_strides, this->m_impl.data(),
454 this->srcCoeff(desc.
offset()));
458 for (
int i = 0;
i < NumDims; ++
i) {
459 dst_to_src_dim_map[
i] =
static_cast<int>(this->m_inverseShuffle[
i]);
464 if (mem !=
NULL) this->m_device.deallocate(mem);
471 #endif // EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H
Eigen::NumTraits< Scalar >::Real RealScalar
#define EIGEN_STRONG_INLINE
XprType::CoeffReturnType CoeffReturnType
m m block(1, 0, 2, 2)<< 4
Eigen::internal::nested< TensorShufflingOp >::type Nested
const TensorShufflingOp< Shuffle, XprType > & type
const XprType & expr() const
TensorShufflingOp< Shuffle, ArgType > XprType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
XprType::CoeffReturnType CoeffReturnType
PacketType< CoeffReturnType, Device >::type PacketReturnType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType & coeffRef(Index index)
XprTraits::StorageKind StorageKind
PacketType< CoeffReturnType, Device >::type PacketReturnType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Namespace containing all symbols from the Eigen library.
A cost model used to limit the number of threads used for evaluating tensor expression.
TensorMaterializedBlock AsTensorMaterializedBlock() const
#define EIGEN_STATIC_ASSERT(CONDITION, MSG)
XprTraits::PointerType PointerType
const Scalar * data() const
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
TensorEvaluator< const TensorShufflingOp< Shuffle, ArgType >, Device > Self
traits< XprType > XprTraits
IndexType dimension(int index) const
const Device EIGEN_DEVICE_REF m_device
EIGEN_DEVICE_FUNC const Shuffle & shufflePermutation() const
array< Index, NumDims > m_outputStrides
internal::remove_const< Scalar >::type ScalarNoConst
TensorEvaluator< const TensorShufflingOp< Shuffle, ArgType >, Device > Base
remove_reference< Nested >::type _Nested
DSizes< IndexType, NumDims > Dimensions
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorShufflingOp(const XprType &expr, const Shuffle &shfl)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(const Target &target, const TensorBlockExpr &expr)
Generic expression where a coefficient-wise binary operator is applied to two expressions.
Eigen::internal::traits< TensorShufflingOp >::Scalar Scalar
const Dimensions & dimensions() const
EIGEN_DEVICE_FUNC Storage::Type data() const
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
StorageMemory< CoeffReturnType, Device > Storage
EIGEN_DEVICE_FUNC const internal::remove_all< typename XprType::Nested >::type & expression() const
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
static Target target(const Dimensions &target_dims, const Dimensions &target_strides, Scalar *target_data, IndexType target_offset=0)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE IndexType Copy(const Dst &dst, const Src &src, const DimensionsMap &dst_to_src_dim_map)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(const TensorBlockDesc &desc, const TensorBlock &block)
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE PacketReturnType Run(const Self &self, Index index)
TensorShufflingOp< Shuffle, XprType > type
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool root_of_expr_ast=false) const
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
DSizes< Index, NumDims > Dimensions
EIGEN_STRONG_INLINE void cleanup()
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index GetBlockOutputIndex(Index input_index, const DSizes< Index, NumDims > &input_block_strides, const DSizes< Index, NumDims > &output_block_strides, const DSizes< internal::TensorIntDivisor< Index >, NumDims > &fast_input_block_strides) const
const Dimensions & strides() const
array< Index, NumDims > m_inverseShuffle
array< Index, NumDims > m_inputStrides
array< int, NumDims > m_shuffle
#define EIGEN_DEVICE_FUNC
XprType::CoeffReturnType CoeffReturnType
DSizes< int, NumDims > DimensionsMap
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
TensorShufflingOp< Shuffle, ArgType > XprType
Eigen::internal::traits< TensorShufflingOp >::Index Index
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
CwiseBinaryOp< internal::scalar_sum_op< double, double >, const CpyMatrixXd, const CpyMatrixXd > XprType
const Dimensions & dimensions() const
#define EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(Derived)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
array< internal::TensorIntDivisor< Index >, NumDims > m_fastOutputStrides
EIGEN_STRONG_INLINE void writePacket(Index index, const PacketReturnType &x)
DSizes< Index, NumDims > Dimensions
array< Index, NumDims > m_unshuffledInputStrides
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType)
Generic expression where a coefficient-wise unary operator is applied to an expression.
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
internal::remove_const< Scalar >::type ScalarNoConst
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy x
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
const std::vector< size_t > dimensions
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE PacketReturnType Run(const Self &self, Index index)
Eigen::internal::traits< TensorShufflingOp >::StorageKind StorageKind
Storage::Type EvaluatorPointerType
internal::TensorMaterializedBlock< ScalarNoConst, NumDims, Layout, Index > TensorBlock
#define EIGEN_UNROLL_LOOP
TensorEvaluator< ArgType, Device > m_impl
TensorBase< TensorShufflingOp< Shuffle, XprType > > Base