Go to the documentation of this file.
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H
23 template<
typename Shuffle,
typename XprType>
30 typedef typename XprType::Nested
Nested;
32 static const int NumDimensions = XprTraits::NumDimensions;
33 static const int Layout = XprTraits::Layout;
37 template<
typename Shuffle,
typename XprType>
43 template<
typename Shuffle,
typename XprType>
53 template<
typename Shuffle,
typename XprType>
85 template<
typename Shuffle,
typename ArgType,
typename Device>
123 m_impl(op.expression(), device)
127 m_is_identity =
true;
128 for (
int i = 0;
i < NumDims; ++
i) {
129 m_shuffle[
i] =
static_cast<int>(shuffle[
i]);
130 m_dimensions[
i] = input_dims[shuffle[
i]];
131 m_inverseShuffle[shuffle[
i]] =
i;
132 if (m_is_identity && shuffle[
i] !=
i) {
133 m_is_identity =
false;
138 m_unshuffledInputStrides[0] = 1;
139 m_outputStrides[0] = 1;
141 for (
int i = 1;
i < NumDims; ++
i) {
142 m_unshuffledInputStrides[
i] =
143 m_unshuffledInputStrides[
i - 1] * input_dims[
i - 1];
144 m_outputStrides[
i] = m_outputStrides[
i - 1] * m_dimensions[
i - 1];
146 m_outputStrides[
i] > 0 ? m_outputStrides[
i] :
Index(1));
149 m_unshuffledInputStrides[NumDims - 1] = 1;
150 m_outputStrides[NumDims - 1] = 1;
151 for (
int i = NumDims - 2;
i >= 0; --
i) {
152 m_unshuffledInputStrides[
i] =
153 m_unshuffledInputStrides[
i + 1] * input_dims[
i + 1];
154 m_outputStrides[
i] = m_outputStrides[
i + 1] * m_dimensions[
i + 1];
156 m_outputStrides[
i] > 0 ? m_outputStrides[
i] :
Index(1));
160 for (
int i = 0;
i < NumDims; ++
i) {
161 m_inputStrides[
i] = m_unshuffledInputStrides[shuffle[
i]];
168 m_impl.evalSubExprsIfNeeded(
NULL);
172 #ifdef EIGEN_USE_THREADS
173 template <
typename EvalSubExprsCallback>
176 m_impl.evalSubExprsIfNeededAsync(
nullptr, [done](
bool) { done(
true); });
178 #endif // EIGEN_USE_THREADS
187 return m_impl.coeff(index);
189 return m_impl.coeff(srcCoeff(index));
193 template <
int LoadMode,
typename Self,
bool ImplPacketAccess>
194 struct PacketLoader {
207 template<
int LoadMode,
typename Self>
208 struct PacketLoader<LoadMode,
Self, true> {
211 if (
self.m_is_identity) {
212 return self.m_impl.template packet<LoadMode>(index);
225 template<
int LoadMode>
230 return PacketLoader<LoadMode, Self, TensorEvaluator<ArgType, Device>::PacketAccess>::Run(*
this, index);
235 static const int inner_dim =
238 const size_t target_size =
m_device.firstLevelCacheSize();
239 const bool inner_dim_shuffled = m_shuffle[inner_dim] != inner_dim;
247 if (inner_dim_shuffled) {
248 return BlockRequirements::uniform<Scalar>(target_size)
251 return BlockRequirements::skewed<Scalar>(target_size);
257 bool root_of_expr_ast =
false)
const {
258 assert(m_impl.data() !=
NULL);
262 typedef typename TensorBlockIO::Dst TensorBlockIODst;
263 typedef typename TensorBlockIO::Src TensorBlockIOSrc;
267 desc, scratch, root_of_expr_ast);
269 typename TensorBlockIO::Dimensions input_strides(m_unshuffledInputStrides);
270 TensorBlockIOSrc src(input_strides, m_impl.data(), srcCoeff(desc.
offset()));
273 block_storage.
data());
275 typename TensorBlockIO::DimensionsMap dst_to_src_dim_map(m_shuffle);
276 TensorBlockIO::Copy(dst, src, dst_to_src_dim_map);
282 const double compute_cost = m_is_identity ? TensorOpCost::AddCost<Index>() :
283 NumDims * (2 * TensorOpCost::AddCost<Index>() +
284 2 * TensorOpCost::MulCost<Index>() +
285 TensorOpCost::DivCost<Index>());
286 return m_impl.costPerCoeff(vectorized) +
292 #ifdef EIGEN_USE_SYCL
304 Index output_index = 0;
306 for (
int i = NumDims - 1;
i > 0; --
i) {
307 const Index idx = input_index / fast_input_block_strides[
i];
308 output_index += idx * output_block_strides[m_inverseShuffle[
i]];
309 input_index -= idx * input_block_strides[
i];
311 return output_index + input_index *
312 output_block_strides[m_inverseShuffle[0]];
314 for (
int i = 0;
i < NumDims - 1; ++
i) {
315 const Index idx = input_index / fast_input_block_strides[
i];
316 output_index += idx * output_block_strides[m_inverseShuffle[
i]];
317 input_index -= idx * input_block_strides[
i];
319 return output_index + input_index *
320 output_block_strides[m_inverseShuffle[NumDims - 1]];
325 Index inputIndex = 0;
327 for (
int i = NumDims - 1;
i > 0; --
i) {
328 const Index idx = index / m_fastOutputStrides[
i];
329 inputIndex += idx * m_inputStrides[
i];
330 index -= idx * m_outputStrides[
i];
332 return inputIndex + index * m_inputStrides[0];
334 for (
int i = 0;
i < NumDims - 1; ++
i) {
335 const Index idx = index / m_fastOutputStrides[
i];
336 inputIndex += idx * m_inputStrides[
i];
337 index -= idx * m_outputStrides[
i];
339 return inputIndex + index * m_inputStrides[NumDims - 1];
358 template<
typename Shuffle,
typename ArgType,
typename Device>
360 :
public TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
394 return this->m_impl.coeffRef(this->srcCoeff(index));
403 internal::pstore<CoeffReturnType, PacketReturnType>(
values,
x);
410 template <
typename TensorBlock>
417 typedef typename TensorBlockIO::Dst TensorBlockIODst;
418 typedef typename TensorBlockIO::Src TensorBlockIOSrc;
425 if (block_buffer ==
NULL) {
431 TensorBlockAssignment;
433 TensorBlockAssignment::Run(
434 TensorBlockAssignment::target(
443 TensorBlockIOSrc src(internal::strides<Layout>(desc.
dimensions()),
447 typename TensorBlockIO::Dimensions output_strides(
448 this->m_unshuffledInputStrides);
449 typename TensorBlockIO::Dimensions output_dimensions;
450 for (
int i = 0;
i < NumDims; ++
i) {
451 output_dimensions[this->m_shuffle[
i]] = desc.
dimension(
i);
453 TensorBlockIODst dst(output_dimensions, output_strides, this->m_impl.data(),
457 typename TensorBlockIO::DimensionsMap dst_to_src_dim_map;
458 for (
int i = 0;
i < NumDims; ++
i) {
459 dst_to_src_dim_map[
i] =
static_cast<int>(this->m_inverseShuffle[
i]);
461 TensorBlockIO::Copy(dst, src, dst_to_src_dim_map);
471 #endif // EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H
EIGEN_DEVICE_FUNC const EIGEN_STRONG_INLINE Dimensions & dimensions() const
EIGEN_DEVICE_FUNC Storage::Type data() const
TensorShufflingOp< Shuffle, XprType > type
internal::TensorMaterializedBlock< ScalarNoConst, NumDims, Layout, Index > TensorBlock
TensorEvaluator< const TensorShufflingOp< Shuffle, ArgType >, Device > Self
#define EIGEN_DEVICE_FUNC
Namespace containing all symbols from the Eigen library.
array< internal::TensorIntDivisor< Index >, NumDims > m_fastOutputStrides
DSizes< Index, NumDims > Dimensions
Generic expression where a coefficient-wise binary operator is applied to two expressions.
Eigen::internal::traits< TensorShufflingOp >::Index Index
Storage::Type EvaluatorPointerType
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE PacketReturnType Run(const Self &self, Index index)
EIGEN_STRONG_INLINE void writePacket(Index index, const PacketReturnType &x)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(const TensorBlockDesc &desc, const TensorBlock &block)
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy x
internal::TensorMaterializedBlock< ScalarNoConst, NumCoords, Layout, Index > TensorBlock
XprType::CoeffReturnType CoeffReturnType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType & coeffRef(Index index)
Eigen::internal::nested< TensorShufflingOp >::type Nested
Eigen::NumTraits< Scalar >::Real RealScalar
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
traits< XprType > XprTraits
array< Index, NumDims > m_inputStrides
const Dimensions & dimensions() const
static const int PacketSize
XprTraits::StorageKind StorageKind
EIGEN_STRONG_INLINE void cleanup()
internal::remove_const< Scalar >::type ScalarNoConst
TensorShufflingOp< Shuffle, ArgType > XprType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const
const EIGEN_DEVICE_FUNC Shuffle & shufflePermutation() const
EIGEN_DEVICE_FUNC const EIGEN_STRONG_INLINE Dimensions & dimensions() const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorShufflingOp(const XprType &expr, const Shuffle &shfl)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
internal::remove_const< Scalar >::type ScalarNoConst
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
#define EIGEN_STRONG_INLINE
#define EIGEN_UNROLL_LOOP
Eigen::internal::traits< TensorShufflingOp >::StorageKind StorageKind
internal::remove_const< Scalar >::type ScalarNoConst
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE PacketReturnType Run(const Self &self, Index index)
array< Index, NumDims > m_unshuffledInputStrides
const EIGEN_DEVICE_FUNC internal::remove_all< typename XprType::Nested >::type & expression() const
DSizes< Index, NumDims > Dimensions
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType)
const Device EIGEN_DEVICE_REF m_device
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index GetBlockOutputIndex(Index input_index, const DSizes< Index, NumDims > &input_block_strides, const DSizes< Index, NumDims > &output_block_strides, const DSizes< internal::TensorIntDivisor< Index >, NumDims > &fast_input_block_strides) const
EIGEN_DEVICE_FUNC TensorBlockResourceRequirements & addCostPerCoeff(TensorOpCost cost)
const typedef TensorShufflingOp< Shuffle, XprType > & type
remove_reference< Nested >::type _Nested
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType & coeffRef(Index index)
#define EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(Derived)
TensorMap< const Tensor< Scalar, NumDims, Layout > > XprType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
array< Index, NumDims > m_inverseShuffle
#define EIGEN_STATIC_ASSERT(CONDITION, MSG)
XprType::CoeffReturnType CoeffReturnType
array< Index, NumDims > m_outputStrides
XprType::CoeffReturnType CoeffReturnType
XprTraits::PointerType PointerType
TensorBase< TensorShufflingOp< Shuffle, XprType > > Base
Storage::Type EvaluatorPointerType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
const Scalar * data() const
PacketType< CoeffReturnType, Device >::type PacketReturnType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
TensorEvaluator< ArgType, Device > m_impl
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
A cost model used to limit the number of threads used for evaluating tensor expression.
const Device EIGEN_DEVICE_REF m_device
const Dimensions & dimensions() const
const XprType & expr() const
StorageMemory< CoeffReturnType, Device > Storage
static EIGEN_STRONG_INLINE Storage prepareStorage(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool allow_strided_storage=false)
IndexType dimension(int index) const
TensorEvaluator< const TensorShufflingOp< Shuffle, ArgType >, Device > Base
array< int, NumDims > m_shuffle
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
const Dimensions & strides() const
PacketType< CoeffReturnType, Device >::type PacketReturnType
TensorMaterializedBlock AsTensorMaterializedBlock() const
TensorShufflingOp< Shuffle, ArgType > XprType
Eigen::internal::traits< TensorShufflingOp >::Scalar Scalar
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool root_of_expr_ast=false) const
gtsam
Author(s):
autogenerated on Sat Jun 1 2024 03:06:02