10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
23 template<
typename Str
ides,
typename XprType>
30 typedef typename XprType::Nested
Nested;
32 static const int NumDimensions = XprTraits::NumDimensions;
33 static const int Layout = XprTraits::Layout;
37 template<
typename Str
ides,
typename XprType>
43 template<
typename Str
ides,
typename XprType>
53 template<
typename Str
ides,
typename XprType>
84 template<
typename Str
ides,
typename ArgType,
typename Device>
113 : m_impl(op.expression(), device)
115 m_dimensions = m_impl.dimensions();
116 for (
int i = 0;
i < NumDims; ++
i) {
122 m_outputStrides[0] = 1;
123 m_inputStrides[0] = 1;
124 for (
int i = 1;
i < NumDims; ++
i) {
125 m_outputStrides[
i] = m_outputStrides[
i-1] * m_dimensions[
i-1];
126 m_inputStrides[
i] = m_inputStrides[
i-1] * input_dims[
i-1];
127 m_inputStrides[
i-1] *= op.
strides()[
i-1];
129 m_inputStrides[NumDims-1] *= op.
strides()[NumDims-1];
131 m_outputStrides[NumDims-1] = 1;
132 m_inputStrides[NumDims-1] = 1;
133 for (
int i = NumDims - 2;
i >= 0; --
i) {
134 m_outputStrides[
i] = m_outputStrides[
i+1] * m_dimensions[
i+1];
135 m_inputStrides[
i] = m_inputStrides[
i+1] * input_dims[
i+1];
136 m_inputStrides[
i+1] *= op.
strides()[
i+1];
138 m_inputStrides[0] *= op.
strides()[0];
146 m_impl.evalSubExprsIfNeeded(
NULL);
155 return m_impl.coeff(srcCoeff(index));
158 template<
int LoadMode>
164 Index inputIndices[] = {0, 0};
168 for (
int i = NumDims - 1;
i > 0; --
i) {
171 inputIndices[0] += idx0 * m_inputStrides[
i];
172 inputIndices[1] += idx1 * m_inputStrides[
i];
173 indices[0] -= idx0 * m_outputStrides[
i];
174 indices[1] -= idx1 * m_outputStrides[
i];
176 inputIndices[0] +=
indices[0] * m_inputStrides[0];
177 inputIndices[1] +=
indices[1] * m_inputStrides[0];
180 for (
int i = 0;
i < NumDims - 1; ++
i) {
183 inputIndices[0] += idx0 * m_inputStrides[
i];
184 inputIndices[1] += idx1 * m_inputStrides[
i];
185 indices[0] -= idx0 * m_outputStrides[
i];
186 indices[1] -= idx1 * m_outputStrides[
i];
188 inputIndices[0] +=
indices[0] * m_inputStrides[NumDims-1];
189 inputIndices[1] +=
indices[1] * m_inputStrides[NumDims-1];
191 if (inputIndices[1] - inputIndices[0] ==
PacketSize - 1) {
197 values[0] = m_impl.coeff(inputIndices[0]);
209 double compute_cost = (NumDims - 1) * (TensorOpCost::AddCost<Index>() +
210 TensorOpCost::MulCost<Index>() +
211 TensorOpCost::DivCost<Index>()) +
212 TensorOpCost::MulCost<Index>();
216 const int innerDim = (
static_cast<int>(
Layout) ==
static_cast<int>(
ColMajor)) ? 0 : (NumDims - 1);
217 return m_impl.costPerCoeff(vectorized && m_inputStrides[innerDim] == 1) +
224 #ifdef EIGEN_USE_SYCL
233 Index inputIndex = 0;
236 for (
int i = NumDims - 1;
i > 0; --
i) {
237 const Index idx = index / m_outputStrides[
i];
238 inputIndex += idx * m_inputStrides[
i];
239 index -= idx * m_outputStrides[
i];
241 inputIndex += index * m_inputStrides[0];
244 for (
int i = 0;
i < NumDims - 1; ++
i) {
245 const Index idx = index / m_outputStrides[
i];
246 inputIndex += idx * m_inputStrides[
i];
247 index -= idx * m_outputStrides[
i];
249 inputIndex += index * m_inputStrides[NumDims-1];
261 template<
typename Str
ides,
typename ArgType,
typename Device>
263 :
public TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
281 :
Base(op, device) { }
291 return this->m_impl.coeffRef(this->srcCoeff(index));
300 Index inputIndices[] = {0, 0};
304 for (
int i = NumDims - 1;
i > 0; --
i) {
307 inputIndices[0] += idx0 * this->m_inputStrides[
i];
308 inputIndices[1] += idx1 * this->m_inputStrides[
i];
309 indices[0] -= idx0 * this->m_outputStrides[
i];
310 indices[1] -= idx1 * this->m_outputStrides[
i];
312 inputIndices[0] +=
indices[0] * this->m_inputStrides[0];
313 inputIndices[1] +=
indices[1] * this->m_inputStrides[0];
316 for (
int i = 0;
i < NumDims - 1; ++
i) {
319 inputIndices[0] += idx0 * this->m_inputStrides[
i];
320 inputIndices[1] += idx1 * this->m_inputStrides[
i];
321 indices[0] -= idx0 * this->m_outputStrides[
i];
322 indices[1] -= idx1 * this->m_outputStrides[
i];
324 inputIndices[0] +=
indices[0] * this->m_inputStrides[NumDims-1];
325 inputIndices[1] +=
indices[1] * this->m_inputStrides[NumDims-1];
327 if (inputIndices[1] - inputIndices[0] ==
PacketSize - 1) {
328 this->m_impl.template writePacket<Unaligned>(inputIndices[0],
x);
332 internal::pstore<Scalar, PacketReturnType>(
values,
x);
333 this->m_impl.coeffRef(inputIndices[0]) =
values[0];
346 #endif // EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H