10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H 24 template<DenseIndex DimId,
typename XprType>
31 typedef typename XprType::Nested
Nested;
33 static const int NumDimensions = XprTraits::NumDimensions - 1;
34 static const int Layout = XprTraits::Layout;
38 template<DenseIndex DimId,
typename XprType>
44 template<DenseIndex DimId,
typename XprType>
50 template <DenseIndex DimId>
79 template<DenseIndex DimId,
typename XprType>
92 : m_xpr(expr), m_offset(offset), m_dim(dim) {
96 const Index
offset()
const {
return m_offset; }
98 const Index
dim()
const {
return m_dim.actualDim(); }
114 template<DenseIndex DimId,
typename ArgType,
typename Device>
119 static const int NumDims = NumInputDims-1;
138 IsOuterChipping = (
static_cast<int>(Layout) ==
ColMajor && DimId == NumInputDims - 1) ||
139 (
static_cast<int>(Layout) ==
RowMajor && DimId == 0),
141 IsInnerChipping = (
static_cast<int>(Layout) ==
ColMajor && DimId == 0) ||
142 (
static_cast<int>(Layout) ==
RowMajor && DimId == NumInputDims - 1),
168 : m_impl(op.expression(), device), m_dim(op.dim()), m_device(device)
177 for (
int i = 0;
i < NumInputDims; ++
i) {
178 if (
i != m_dim.actualDim()) {
179 m_dimensions[
j] = input_dims[
i];
186 if (static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
187 for (
int i = 0;
i < m_dim.actualDim(); ++
i) {
188 m_stride *= input_dims[
i];
189 m_inputStride *= input_dims[
i];
192 for (
int i = NumInputDims-1;
i > m_dim.actualDim(); --
i) {
193 m_stride *= input_dims[
i];
194 m_inputStride *= input_dims[
i];
197 m_inputStride *= input_dims[m_dim.actualDim()];
198 m_inputOffset = m_stride * op.
offset();
204 m_impl.evalSubExprsIfNeeded(
NULL);
214 return m_impl.coeff(srcCoeff(index));
217 template<
int LoadMode>
223 if (isInnerChipping()) {
226 Index inputIndex = index * m_inputStride + m_inputOffset;
229 for (
int i = 0;
i < PacketSize; ++
i) {
230 values[
i] = m_impl.coeff(inputIndex);
231 inputIndex += m_inputStride;
233 PacketReturnType rslt = internal::pload<PacketReturnType>(
values);
235 }
else if (isOuterChipping()) {
238 return m_impl.template packet<LoadMode>(index + m_inputOffset);
240 const Index idx = index / m_stride;
241 const Index rem = index - idx * m_stride;
242 if (rem + PacketSize <= m_stride) {
243 Index inputIndex = idx * m_inputStride + m_inputOffset + rem;
244 return m_impl.template packet<LoadMode>(inputIndex);
249 for (
int i = 0;
i < PacketSize; ++
i) {
250 values[
i] = coeff(index);
253 PacketReturnType rslt = internal::pload<PacketReturnType>(
values);
262 if ((static_cast<int>(Layout) == static_cast<int>(
ColMajor) &&
263 m_dim.actualDim() == 0) ||
264 (static_cast<int>(Layout) ==
static_cast<int>(
RowMajor) &&
265 m_dim.actualDim() == NumInputDims - 1)) {
266 cost += TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>();
267 }
else if ((static_cast<int>(Layout) == static_cast<int>(
ColMajor) &&
268 m_dim.actualDim() == NumInputDims - 1) ||
269 (static_cast<int>(Layout) ==
static_cast<int>(
RowMajor) &&
270 m_dim.actualDim() == 0)) {
271 cost += TensorOpCost::AddCost<Index>();
273 cost += 3 * TensorOpCost::MulCost<Index>() + TensorOpCost::DivCost<Index>() +
274 3 * TensorOpCost::AddCost<Index>();
277 return m_impl.costPerCoeff(vectorized) +
283 const size_t target_size = m_device.lastLevelCacheSize();
285 internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size),
286 m_impl.getResourceRequirements());
290 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
291 bool root_of_expr_ast =
false)
const {
292 const Index chip_dim = m_dim.actualDim();
295 for (
int i = 0;
i < NumInputDims; ++
i) {
307 for (
int i = 0;
i < NumInputDims; ++
i) {
308 arg_destination_strides[
i]
314 arg_desc.template AddDestinationBuffer<Layout>(
316 arg_destination_strides);
319 ArgTensorBlock arg_block = m_impl.block(arg_desc, scratch, root_of_expr_ast);
332 TensorBlock::prepareStorage(desc, scratch);
336 TensorBlockAssignment;
341 internal::strides<Layout>(arg_desc.
dimensions()),
342 block_storage.
data()),
351 if (isOuterChipping() &&
result) {
352 return result + m_inputOffset;
357 #ifdef EIGEN_USE_SYCL 368 if (isInnerChipping()) {
371 inputIndex = index * m_inputStride + m_inputOffset;
372 }
else if (isOuterChipping()) {
376 inputIndex = index + m_inputOffset;
378 const Index idx = index / m_stride;
379 inputIndex = idx * m_inputStride + m_inputOffset;
380 index -= idx * m_stride;
387 return IsInnerChipping ||
388 (
static_cast<int>(Layout) ==
ColMajor && m_dim.actualDim() == 0) ||
389 (static_cast<int>(Layout) ==
RowMajor && m_dim.actualDim() == NumInputDims - 1);
393 return IsOuterChipping ||
394 (
static_cast<int>(Layout) ==
ColMajor && m_dim.actualDim() == NumInputDims-1) ||
395 (static_cast<int>(Layout) ==
RowMajor && m_dim.actualDim() == 0);
409 template<DenseIndex DimId,
typename ArgType,
typename Device>
411 :
public TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
416 static const int NumDims = NumInputDims-1;
442 return this->m_impl.coeffRef(this->srcCoeff(index));
450 if (this->isInnerChipping()) {
454 internal::pstore<CoeffReturnType, PacketReturnType>(
values,
x);
455 Index inputIndex = index * this->m_inputStride + this->m_inputOffset;
457 for (
int i = 0;
i < PacketSize; ++
i) {
458 this->m_impl.coeffRef(inputIndex) = values[
i];
459 inputIndex += this->m_inputStride;
461 }
else if (this->isOuterChipping()) {
464 this->m_impl.template writePacket<StoreMode>(index + this->m_inputOffset,
x);
466 const Index idx = index / this->m_stride;
467 const Index rem = index - idx * this->m_stride;
468 if (rem + PacketSize <= this->m_stride) {
469 const Index inputIndex = idx * this->m_inputStride + this->m_inputOffset + rem;
470 this->m_impl.template writePacket<StoreMode>(inputIndex,
x);
474 internal::pstore<CoeffReturnType, PacketReturnType>(
values,
x);
476 for (
int i = 0;
i < PacketSize; ++
i) {
477 this->coeffRef(index) = values[
i];
484 template <
typename TensorBlock>
487 assert(this->m_impl.data() !=
NULL);
489 const Index chip_dim = this->m_dim.actualDim();
492 for (
int i = 0;
i < NumInputDims; ++
i) {
503 TensorBlockExpr, Index>
506 TensorBlockAssign::Run(
507 TensorBlockAssign::target(
509 internal::strides<Layout>(this->m_impl.dimensions()),
510 this->m_impl.data(), this->srcCoeff(desc.
offset())),
518 #endif // EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H const internal::DimensionId< DimId > m_dim
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool root_of_expr_ast=false) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const
XprType::CoeffReturnType CoeffReturnType
#define EIGEN_STRONG_INLINE
TensorChippingOp< DimId, ArgType > XprType
TensorEvaluator< const ArgType, Device >::TensorBlock ArgTensorBlock
m m block(1, 0, 2, 2)<< 4
Eigen::NumTraits< Scalar >::Real RealScalar
const XprType & expr() const
DSizes< Index, NumDims > Dimensions
bool HasDestinationBuffer() const
XprType::CoeffReturnType CoeffReturnType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim)
Eigen::internal::traits< TensorChippingOp >::StorageKind StorageKind
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy y set format x g set format y g set format x2 g set format y2 g set format z g set angles radians set nogrid set key title set key left top Right noreverse box linetype linewidth samplen spacing width set nolabel set noarrow set nologscale set logscale x set set pointsize set encoding default set nopolar set noparametric set set set set surface set nocontour set clabel set mapping cartesian set nohidden3d set cntrparam order set cntrparam linear set cntrparam levels auto set cntrparam points set size set set xzeroaxis lt lw set x2zeroaxis lt lw set yzeroaxis lt lw set y2zeroaxis lt lw set tics in set ticslevel set tics set mxtics default set mytics default set mx2tics default set my2tics default set xtics border mirror norotate autofreq set ytics border mirror norotate autofreq set ztics border nomirror norotate autofreq set nox2tics set noy2tics set timestamp bottom norotate offset
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
XprTraits::StorageKind StorageKind
Namespace containing all symbols from the Eigen library.
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool isOuterChipping() const
A cost model used to limit the number of threads used for evaluating tensor expression.
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
TensorMaterializedBlock AsTensorMaterializedBlock() const
const TensorChippingOp< DimId, XprType > EIGEN_DEVICE_REF type
XprTraits::PointerType PointerType
EIGEN_DEVICE_FUNC const internal::remove_all< typename XprType::Nested >::type & expression() const
#define EIGEN_STATIC_ASSERT(CONDITION, MSG)
const Device EIGEN_DEVICE_REF m_device
remove_reference< Nested >::type _Nested
const Scalar * data() const
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType)
IndexType dimension(int index) const
TensorBlockDescriptor & DropDestinationBuffer()
DSizes< Index, NumDims > Dimensions
EIGEN_STRONG_INLINE void cleanup()
const DestinationBuffer & destination() const
const internal::DimensionId< DimId > m_dim
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(const TensorBlockDesc &desc, const TensorBlock &block)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorReshapingOp< const NewDimensions, const Derived > reshape(const NewDimensions &newDimensions) const
TensorEvaluator< ArgType, Device > m_impl
internal::TensorBlockDescriptor< NumInputDims, Index > ArgTensorBlockDesc
TensorBase< TensorChippingOp< DimId, XprType > > Base
EIGEN_DEVICE_FUNC const Index dim() const
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(const Target &target, const TensorBlockExpr &expr)
Generic expression where a coefficient-wise binary operator is applied to two expressions.
traits< XprType > XprTraits
XprType::CoeffReturnType CoeffReturnType
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T * constCast(const T *data)
TensorBlockKind kind() const
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
static Target target(const Dimensions &target_dims, const Dimensions &target_strides, Scalar *target_data, IndexType target_offset=0)
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Storage::Type EvaluatorPointerType
Eigen::internal::traits< TensorChippingOp >::Index Index
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const
const DenseIndex actual_dim
TensorEvaluator< const TensorChippingOp< DimId, ArgType >, Device > Base
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketReturnType &x)
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
TensorChippingOp< DimId, ArgType > XprType
PacketType< CoeffReturnType, Device >::type PacketReturnType
PacketType< CoeffReturnType, Device >::type PacketReturnType
StorageMemory< CoeffReturnType, Device > Storage
#define EIGEN_DEVICE_FUNC
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorChippingOp(const XprType &expr, const Index offset, const Index dim)
EIGEN_DEFAULT_DENSE_INDEX_TYPE DenseIndex
Eigen::internal::nested< TensorChippingOp >::type Nested
TensorChippingOp< DimId, XprType > type
internal::remove_const< Scalar >::type ScalarNoConst
EIGEN_DEVICE_FUNC const Index offset() const
CwiseBinaryOp< internal::scalar_sum_op< double, double >, const CpyMatrixXd, const CpyMatrixXd > XprType
const Dimensions & dimensions() const
#define EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(Derived)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool isInnerChipping() const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim)
Eigen::internal::traits< TensorChippingOp >::Scalar Scalar
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType & coeffRef(Index index)
Generic expression where a coefficient-wise unary operator is applied to an expression.
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy x
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Storage::Type data() const
const std::vector< size_t > dimensions
internal::TensorMaterializedBlock< ScalarNoConst, NumDims, Layout, Index > TensorBlock
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockResourceRequirements merge(const TensorBlockResourceRequirements &lhs, const TensorBlockResourceRequirements &rhs)
#define EIGEN_UNUSED_VARIABLE(var)
#define EIGEN_UNROLL_LOOP