Go to the documentation of this file.
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
24 template<DenseIndex DimId,
typename XprType>
31 typedef typename XprType::Nested
Nested;
33 static const int NumDimensions = XprTraits::NumDimensions - 1;
34 static const int Layout = XprTraits::Layout;
38 template<DenseIndex DimId,
typename XprType>
44 template<DenseIndex DimId,
typename XprType>
50 template <DenseIndex DimId>
79 template<DenseIndex DimId,
typename XprType>
114 template<DenseIndex DimId,
typename ArgType,
typename Device>
119 static const int NumDims = NumInputDims-1;
138 IsOuterChipping = (
static_cast<int>(
Layout) ==
ColMajor && DimId == NumInputDims - 1) ||
142 (
static_cast<int>(
Layout) ==
RowMajor && DimId == NumInputDims - 1),
168 : m_impl(op.expression(), device), m_dim(op.dim()),
m_device(device)
177 for (
int i = 0;
i < NumInputDims; ++
i) {
178 if (
i != m_dim.actualDim()) {
179 m_dimensions[
j] = input_dims[
i];
187 for (
int i = 0;
i < m_dim.actualDim(); ++
i) {
188 m_stride *= input_dims[
i];
189 m_inputStride *= input_dims[
i];
192 for (
int i = NumInputDims-1;
i > m_dim.actualDim(); --
i) {
193 m_stride *= input_dims[
i];
194 m_inputStride *= input_dims[
i];
197 m_inputStride *= input_dims[m_dim.actualDim()];
198 m_inputOffset = m_stride * op.
offset();
204 m_impl.evalSubExprsIfNeeded(
NULL);
214 return m_impl.coeff(srcCoeff(index));
217 template<
int LoadMode>
223 if (isInnerChipping()) {
226 Index inputIndex = index * m_inputStride + m_inputOffset;
230 values[
i] = m_impl.coeff(inputIndex);
231 inputIndex += m_inputStride;
235 }
else if (isOuterChipping()) {
238 return m_impl.template packet<LoadMode>(index + m_inputOffset);
240 const Index idx = index / m_stride;
241 const Index rem = index - idx * m_stride;
243 Index inputIndex = idx * m_inputStride + m_inputOffset + rem;
244 return m_impl.template packet<LoadMode>(inputIndex);
263 m_dim.actualDim() == 0) ||
265 m_dim.actualDim() == NumInputDims - 1)) {
266 cost += TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>();
267 }
else if ((
static_cast<int>(
Layout) ==
static_cast<int>(
ColMajor) &&
268 m_dim.actualDim() == NumInputDims - 1) ||
270 m_dim.actualDim() == 0)) {
271 cost += TensorOpCost::AddCost<Index>();
273 cost += 3 * TensorOpCost::MulCost<Index>() + TensorOpCost::DivCost<Index>() +
274 3 * TensorOpCost::AddCost<Index>();
277 return m_impl.costPerCoeff(vectorized) +
283 const size_t target_size =
m_device.lastLevelCacheSize();
285 internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size),
286 m_impl.getResourceRequirements());
291 bool root_of_expr_ast =
false)
const {
292 const Index chip_dim = m_dim.actualDim();
295 for (
int i = 0;
i < NumInputDims; ++
i) {
297 =
i < chip_dim ?
desc.dimension(
i)
298 :
i > chip_dim ?
desc.dimension(
i - 1)
305 if (
desc.HasDestinationBuffer()) {
307 for (
int i = 0;
i < NumInputDims; ++
i) {
308 arg_destination_strides[
i]
309 =
i < chip_dim ?
desc.destination().strides()[
i]
310 :
i > chip_dim ?
desc.destination().strides()[
i - 1]
314 arg_desc.template AddDestinationBuffer<Layout>(
315 desc.destination().template data<ScalarNoConst>(),
316 arg_destination_strides);
319 ArgTensorBlock arg_block = m_impl.block(arg_desc, scratch, root_of_expr_ast);
336 TensorBlockAssignment;
338 TensorBlockAssignment::Run(
339 TensorBlockAssignment::target(
341 internal::strides<Layout>(arg_desc.
dimensions()),
342 block_storage.
data()),
351 if (isOuterChipping() &&
result) {
352 return result + m_inputOffset;
357 #ifdef EIGEN_USE_SYCL
368 if (isInnerChipping()) {
371 inputIndex = index * m_inputStride + m_inputOffset;
372 }
else if (isOuterChipping()) {
376 inputIndex = index + m_inputOffset;
378 const Index idx = index / m_stride;
379 inputIndex = idx * m_inputStride + m_inputOffset;
380 index -= idx * m_stride;
387 return IsInnerChipping ||
388 (
static_cast<int>(
Layout) ==
ColMajor && m_dim.actualDim() == 0) ||
389 (
static_cast<int>(
Layout) ==
RowMajor && m_dim.actualDim() == NumInputDims - 1);
393 return IsOuterChipping ||
394 (
static_cast<int>(
Layout) ==
ColMajor && m_dim.actualDim() == NumInputDims-1) ||
409 template<DenseIndex DimId,
typename ArgType,
typename Device>
411 :
public TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
416 static const int NumDims = NumInputDims-1;
442 return this->m_impl.coeffRef(this->srcCoeff(index));
450 if (this->isInnerChipping()) {
454 internal::pstore<CoeffReturnType, PacketReturnType>(
values,
x);
455 Index inputIndex = index * this->m_inputStride + this->m_inputOffset;
458 this->m_impl.coeffRef(inputIndex) =
values[
i];
459 inputIndex += this->m_inputStride;
461 }
else if (this->isOuterChipping()) {
464 this->m_impl.template writePacket<StoreMode>(index + this->m_inputOffset,
x);
466 const Index idx = index / this->m_stride;
467 const Index rem = index - idx * this->m_stride;
468 if (rem + PacketSize <= this->m_stride) {
469 const Index inputIndex = idx * this->m_inputStride + this->m_inputOffset + rem;
470 this->m_impl.template writePacket<StoreMode>(inputIndex,
x);
474 internal::pstore<CoeffReturnType, PacketReturnType>(
values,
x);
484 template <
typename TensorBlock>
487 assert(this->m_impl.data() !=
NULL);
489 const Index chip_dim = this->m_dim.actualDim();
492 for (
int i = 0;
i < NumInputDims; ++
i) {
493 input_block_dims[
i] =
i < chip_dim ?
desc.dimension(
i)
494 :
i > chip_dim ?
desc.dimension(
i - 1)
503 TensorBlockExpr,
Index>
506 TensorBlockAssign::Run(
507 TensorBlockAssign::target(
509 internal::strides<Layout>(this->m_impl.dimensions()),
518 #endif // EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
EIGEN_DEVICE_FUNC const EIGEN_STRONG_INLINE Dimensions & dimensions() const
#define EIGEN_DEVICE_FUNC
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
Eigen::internal::nested< TensorChippingOp >::type Nested
Namespace containing all symbols from the Eigen library.
const internal::DimensionId< DimId > m_dim
internal::remove_const< Scalar >::type ScalarNoConst
Generic expression where a coefficient-wise binary operator is applied to two expressions.
StorageMemory< CoeffReturnType, Device > Storage
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Storage::Type data() const
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim)
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy x
Storage::Type EvaluatorPointerType
internal::TensorMaterializedBlock< ScalarNoConst, NumCoords, Layout, Index > TensorBlock
Eigen::NumTraits< Scalar >::Real RealScalar
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
internal::TensorMaterializedBlock< ScalarNoConst, NumDims, Layout, Index > TensorBlock
PacketType< CoeffReturnType, Device >::type PacketReturnType
XprType::CoeffReturnType CoeffReturnType
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockResourceRequirements merge(const TensorBlockResourceRequirements &lhs, const TensorBlockResourceRequirements &rhs)
TensorChippingOp< DimId, ArgType > XprType
const Dimensions & dimensions() const
static const int PacketSize
traits< XprType > XprTraits
PacketType< CoeffReturnType, Device >::type PacketReturnType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool isOuterChipping() const
const EIGEN_DEVICE_FUNC internal::remove_all< typename XprType::Nested >::type & expression() const
XprType::CoeffReturnType CoeffReturnType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool root_of_expr_ast=false) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const
EIGEN_STRONG_INLINE void cleanup()
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType)
DSizes< Index, NumDims > Dimensions
#define EIGEN_UNUSED_VARIABLE(var)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketReturnType &x)
internal::TensorBlockDescriptor< NumInputDims, Index > ArgTensorBlockDesc
const typedef TensorChippingOp< DimId, XprType > EIGEN_DEVICE_REF type
const internal::DimensionId< DimId > m_dim
EIGEN_DEVICE_FUNC const EIGEN_STRONG_INLINE Dimensions & dimensions() const
#define EIGEN_STRONG_INLINE
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(const TensorBlockDesc &desc, const TensorBlock &block)
#define EIGEN_UNROLL_LOOP
internal::remove_const< Scalar >::type ScalarNoConst
TensorEvaluator< const TensorChippingOp< DimId, ArgType >, Device > Base
TensorEvaluator< const ArgType, Device >::TensorBlock ArgTensorBlock
TensorBase< TensorChippingOp< DimId, XprType > > Base
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
const Device EIGEN_DEVICE_REF m_device
const Device EIGEN_DEVICE_REF m_device
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorChippingOp(const XprType &expr, const Index offset, const Index dim)
TensorEvaluator< ArgType, Device > m_impl
const DenseIndex actual_dim
CwiseBinaryOp< internal::scalar_sum_op< double, double >, const CpyMatrixXd, const CpyMatrixXd > XprType
const EIGEN_DEVICE_FUNC Index offset() const
DerivedTraits::Index Index
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType & coeffRef(Index index)
XprTraits::PointerType PointerType
remove_reference< Nested >::type _Nested
TensorChippingOp< DimId, XprType > type
bool HasDestinationBuffer() const
#define EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(Derived)
#define EIGEN_STATIC_ASSERT(CONDITION, MSG)
EIGEN_DEFAULT_DENSE_INDEX_TYPE DenseIndex
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType & coeffRef(Index index)
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
DSizes< Index, NumDims > Dimensions
const Scalar * data() const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
XprType::CoeffReturnType CoeffReturnType
A cost model used to limit the number of threads used for evaluating tensor expression.
XprTraits::StorageKind StorageKind
Eigen::internal::traits< TensorChippingOp >::Index Index
Eigen::internal::traits< TensorChippingOp >::StorageKind StorageKind
const XprType & expr() const
TensorChippingOp< DimId, ArgType > XprType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool isInnerChipping() const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const
static EIGEN_STRONG_INLINE Storage prepareStorage(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool allow_strided_storage=false)
Eigen::internal::traits< TensorChippingOp >::Scalar Scalar
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
TensorMaterializedBlock AsTensorMaterializedBlock() const
const EIGEN_DEVICE_FUNC Index dim() const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T * constCast(const T *data)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
TensorBlockKind kind() const
gtsam
Author(s):
autogenerated on Fri Nov 1 2024 03:37:15