10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H 23 template<
typename Str
ides,
typename XprType>
30 typedef typename XprType::Nested
Nested;
32 static const int NumDimensions = XprTraits::NumDimensions;
33 static const int Layout = XprTraits::Layout;
37 template<
typename Str
ides,
typename XprType>
43 template<
typename Str
ides,
typename XprType>
53 template<
typename Str
ides,
typename XprType>
66 : m_xpr(expr), m_dims(dims) {}
69 const Strides&
strides()
const {
return m_dims; }
84 template<
typename Str
ides,
typename ArgType,
typename Device>
113 : m_impl(op.expression(), device)
115 m_dimensions = m_impl.dimensions();
116 for (
int i = 0;
i < NumDims; ++
i) {
121 if (static_cast<int>(Layout) == static_cast<int>(
ColMajor)) {
122 m_outputStrides[0] = 1;
123 m_inputStrides[0] = 1;
124 for (
int i = 1;
i < NumDims; ++
i) {
125 m_outputStrides[
i] = m_outputStrides[
i-1] * m_dimensions[
i-1];
126 m_inputStrides[
i] = m_inputStrides[
i-1] * input_dims[
i-1];
127 m_inputStrides[
i-1] *= op.
strides()[
i-1];
129 m_inputStrides[NumDims-1] *= op.
strides()[NumDims-1];
131 m_outputStrides[NumDims-1] = 1;
132 m_inputStrides[NumDims-1] = 1;
133 for (
int i = NumDims - 2;
i >= 0; --
i) {
134 m_outputStrides[
i] = m_outputStrides[
i+1] * m_dimensions[
i+1];
135 m_inputStrides[
i] = m_inputStrides[
i+1] * input_dims[
i+1];
136 m_inputStrides[
i+1] *= op.
strides()[
i+1];
138 m_inputStrides[0] *= op.
strides()[0];
146 m_impl.evalSubExprsIfNeeded(
NULL);
155 return m_impl.coeff(srcCoeff(index));
158 template<
int LoadMode>
164 Index inputIndices[] = {0, 0};
165 Index indices[] = {index, index + PacketSize - 1};
166 if (static_cast<int>(Layout) == static_cast<int>(
ColMajor)) {
168 for (
int i = NumDims - 1;
i > 0; --
i) {
169 const Index idx0 = indices[0] / m_outputStrides[
i];
170 const Index idx1 = indices[1] / m_outputStrides[
i];
171 inputIndices[0] += idx0 * m_inputStrides[
i];
172 inputIndices[1] += idx1 * m_inputStrides[
i];
173 indices[0] -= idx0 * m_outputStrides[
i];
174 indices[1] -= idx1 * m_outputStrides[
i];
176 inputIndices[0] += indices[0] * m_inputStrides[0];
177 inputIndices[1] += indices[1] * m_inputStrides[0];
180 for (
int i = 0;
i < NumDims - 1; ++
i) {
181 const Index idx0 = indices[0] / m_outputStrides[
i];
182 const Index idx1 = indices[1] / m_outputStrides[
i];
183 inputIndices[0] += idx0 * m_inputStrides[
i];
184 inputIndices[1] += idx1 * m_inputStrides[
i];
185 indices[0] -= idx0 * m_outputStrides[
i];
186 indices[1] -= idx1 * m_outputStrides[
i];
188 inputIndices[0] += indices[0] * m_inputStrides[NumDims-1];
189 inputIndices[1] += indices[1] * m_inputStrides[NumDims-1];
191 if (inputIndices[1] - inputIndices[0] == PacketSize - 1) {
192 PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]);
197 values[0] = m_impl.coeff(inputIndices[0]);
198 values[PacketSize-1] = m_impl.coeff(inputIndices[1]);
200 for (
int i = 1;
i < PacketSize-1; ++
i) {
201 values[
i] = coeff(index+
i);
203 PacketReturnType rslt = internal::pload<PacketReturnType>(
values);
209 double compute_cost = (NumDims - 1) * (TensorOpCost::AddCost<Index>() +
210 TensorOpCost::MulCost<Index>() +
211 TensorOpCost::DivCost<Index>()) +
212 TensorOpCost::MulCost<Index>();
216 const int innerDim = (
static_cast<int>(Layout) == static_cast<int>(
ColMajor)) ? 0 : (NumDims - 1);
217 return m_impl.costPerCoeff(vectorized && m_inputStrides[innerDim] == 1) +
219 TensorOpCost(0, 0, compute_cost, vectorized, PacketSize);
224 #ifdef EIGEN_USE_SYCL 233 Index inputIndex = 0;
234 if (static_cast<int>(Layout) == static_cast<int>(
ColMajor)) {
236 for (
int i = NumDims - 1;
i > 0; --
i) {
237 const Index idx = index / m_outputStrides[
i];
238 inputIndex += idx * m_inputStrides[
i];
239 index -= idx * m_outputStrides[
i];
241 inputIndex += index * m_inputStrides[0];
244 for (
int i = 0;
i < NumDims - 1; ++
i) {
245 const Index idx = index / m_outputStrides[
i];
246 inputIndex += idx * m_inputStrides[
i];
247 index -= idx * m_outputStrides[
i];
249 inputIndex += index * m_inputStrides[NumDims-1];
261 template<
typename Str
ides,
typename ArgType,
typename Device>
263 :
public TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
274 PreferBlockAccess =
false,
281 : Base(op, device) { }
291 return this->m_impl.coeffRef(this->srcCoeff(index));
300 Index inputIndices[] = {0, 0};
301 Index indices[] = {index, index + PacketSize - 1};
302 if (static_cast<int>(Layout) == static_cast<int>(
ColMajor)) {
304 for (
int i = NumDims - 1;
i > 0; --
i) {
305 const Index idx0 = indices[0] / this->m_outputStrides[
i];
306 const Index idx1 = indices[1] / this->m_outputStrides[
i];
307 inputIndices[0] += idx0 * this->m_inputStrides[
i];
308 inputIndices[1] += idx1 * this->m_inputStrides[
i];
309 indices[0] -= idx0 * this->m_outputStrides[
i];
310 indices[1] -= idx1 * this->m_outputStrides[
i];
312 inputIndices[0] += indices[0] * this->m_inputStrides[0];
313 inputIndices[1] += indices[1] * this->m_inputStrides[0];
316 for (
int i = 0;
i < NumDims - 1; ++
i) {
317 const Index idx0 = indices[0] / this->m_outputStrides[
i];
318 const Index idx1 = indices[1] / this->m_outputStrides[
i];
319 inputIndices[0] += idx0 * this->m_inputStrides[
i];
320 inputIndices[1] += idx1 * this->m_inputStrides[
i];
321 indices[0] -= idx0 * this->m_outputStrides[
i];
322 indices[1] -= idx1 * this->m_outputStrides[
i];
324 inputIndices[0] += indices[0] * this->m_inputStrides[NumDims-1];
325 inputIndices[1] += indices[1] * this->m_inputStrides[NumDims-1];
327 if (inputIndices[1] - inputIndices[0] == PacketSize - 1) {
328 this->m_impl.template writePacket<Unaligned>(inputIndices[0],
x);
332 internal::pstore<Scalar, PacketReturnType>(
values,
x);
333 this->m_impl.coeffRef(inputIndices[0]) = values[0];
334 this->m_impl.coeffRef(inputIndices[1]) = values[PacketSize-1];
336 for (
int i = 1;
i < PacketSize-1; ++
i) {
337 this->coeffRef(index+
i) = values[
i];
346 #endif // EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingOp(const XprType &expr, const Strides &dims)
TensorStridingOp< Strides, ArgType > XprType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
Storage::Type EvaluatorPointerType
#define EIGEN_STRONG_INLINE
TensorEvaluator< ArgType, Device > m_impl
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
array< Index, NumDims > m_outputStrides
XprType::CoeffReturnType CoeffReturnType
TensorEvaluator< const XprType, Device > Base
Eigen::internal::traits< TensorStridingOp >::Index Index
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Namespace containing all symbols from the Eigen library.
A cost model used to limit the number of threads used for evaluating tensor expression.
#define EIGEN_STATIC_ASSERT(CONDITION, MSG)
PacketType< CoeffReturnType, Device >::type PacketReturnType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
XprTraits::StorageKind StorageKind
TensorBase< TensorStridingOp< Strides, XprType > > Base
EIGEN_DEVICE_FUNC T() ceil(const T &x)
XprTraits::PointerType PointerType
internal::TensorBlockNotImplemented TensorBlock
Generic expression where a coefficient-wise binary operator is applied to two expressions.
array< Index, NumDims > m_inputStrides
TensorStridingOp< Strides, XprType > type
TensorStridingOp< Strides, ArgType > XprType
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Eigen::internal::nested< TensorStridingOp >::type Nested
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
DSizes< Index, NumDims > Dimensions
EIGEN_STRONG_INLINE void cleanup()
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketReturnType &x)
Eigen::internal::traits< TensorStridingOp >::Scalar Scalar
const TensorStridingOp< Strides, XprType > EIGEN_DEVICE_REF type
XprType::CoeffReturnType CoeffReturnType
#define EIGEN_DEVICE_FUNC
Eigen::internal::traits< TensorStridingOp >::StorageKind StorageKind
StorageMemory< CoeffReturnType, Device > Storage
PacketType< CoeffReturnType, Device >::type PacketReturnType
EIGEN_DEVICE_FUNC Storage::Type data() const
EIGEN_DEVICE_FUNC const internal::remove_all< typename XprType::Nested >::type & expression() const
traits< XprType > XprTraits
XprType::CoeffReturnType CoeffReturnType
#define EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(Derived)
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType)
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Generic expression where a coefficient-wise unary operator is applied to an expression.
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy x
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar & coeffRef(Index index)
const std::vector< size_t > dimensions
EIGEN_DEVICE_FUNC const Strides & strides() const
remove_reference< Nested >::type _Nested
#define EIGEN_UNROLL_LOOP
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Eigen::NumTraits< Scalar >::Real RealScalar