10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
23 template<
typename NewDimensions,
typename XprType>
30 typedef typename XprType::Nested
Nested;
33 static const int Layout = XprTraits::Layout;
36 template<
typename NewDimensions,
typename XprType>
42 template<
typename NewDimensions,
typename XprType>
52 template<
typename NewDimensions,
typename XprType>
76 Assign assign(*
this, other);
81 template<
typename OtherDerived>
86 Assign assign(*
this, other);
98 template<
typename NewDimensions,
typename ArgType,
typename Device>
136 return m_impl.coeff(index);
139 template<
int LoadMode>
142 return m_impl.template packet<LoadMode>(index);
146 return m_impl.costPerCoeff(vectorized);
160 template<
typename NewDimensions,
typename ArgType,
typename Device>
162 :
public TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
188 return this->
m_impl.coeffRef(index);
193 this->
m_impl.template writePacket<StoreMode>(index,
x);
206 template<
typename StartIndices,
typename Sizes,
typename XprType>
216 static const int Layout = XprTraits::Layout;
219 template<
typename StartIndices,
typename Sizes,
typename XprType>
225 template<
typename StartIndices,
typename Sizes,
typename XprType>
235 template<
typename StartIndices,
typename Sizes,
typename XprType>
257 template<
typename OtherDerived>
262 Assign assign(*
this, other);
271 Assign assign(*
this, other);
286 template <
typename Index,
typename Device>
struct MemcpyTriggerForSlicing {
287 EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(
const Device& device) :
threshold_(2 * device.numThreads()) { }
288 EIGEN_DEVICE_FUNC
bool operator ()(
Index val)
const {
return val >
threshold_; }
297 template <
typename Index>
struct MemcpyTriggerForSlicing<
Index, GpuDevice> {
298 EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(
const GpuDevice&) { }
299 EIGEN_DEVICE_FUNC
bool operator ()(
Index val)
const {
return val > 4*1024*1024; }
305 template<
typename StartIndices,
typename Sizes,
typename ArgType,
typename Device>
324 for (std::size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
331 m_inputStrides[0] = 1;
332 for (
int i = 1; i < NumDims; ++i) {
333 m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
337 m_outputStrides[0] = 1;
338 for (
int i = 1; i < NumDims; ++i) {
339 m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
343 m_inputStrides[NumDims-1] = 1;
344 for (
int i = NumDims - 2; i >= 0; --i) {
345 m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
349 m_outputStrides[NumDims-1] = 1;
350 for (
int i = NumDims - 2; i >= 0; --i) {
351 m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
367 m_impl.evalSubExprsIfNeeded(NULL);
369 Index contiguous_values = 1;
371 for (
int i = 0; i < NumDims; ++i) {
378 for (
int i = NumDims-1; i >= 0; --i) {
386 const MemcpyTriggerForSlicing<Index, Device> trigger(
m_device);
387 if (trigger(contiguous_values)) {
390 Index offset = srcCoeff(i);
405 return m_impl.coeff(srcCoeff(index));
408 template<
int LoadMode>
415 Index inputIndices[] = {0, 0};
416 Index indices[] = {index, index + packetSize - 1};
418 for (
int i = NumDims - 1; i > 0; --i) {
419 const Index idx0 = indices[0] / m_fastOutputStrides[i];
420 const Index idx1 = indices[1] / m_fastOutputStrides[i];
421 inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
422 inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
423 indices[0] -= idx0 * m_outputStrides[i];
424 indices[1] -= idx1 * m_outputStrides[i];
426 inputIndices[0] += (indices[0] + m_offsets[0]);
427 inputIndices[1] += (indices[1] + m_offsets[0]);
429 for (
int i = 0; i < NumDims - 1; ++i) {
430 const Index idx0 = indices[0] / m_fastOutputStrides[i];
431 const Index idx1 = indices[1] / m_fastOutputStrides[i];
432 inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
433 inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
434 indices[0] -= idx0 * m_outputStrides[i];
435 indices[1] -= idx1 * m_outputStrides[i];
437 inputIndices[0] += (indices[0] + m_offsets[NumDims-1]);
438 inputIndices[1] += (indices[1] + m_offsets[NumDims-1]);
440 if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
446 values[0] =
m_impl.coeff(inputIndices[0]);
447 values[packetSize-1] =
m_impl.coeff(inputIndices[1]);
448 for (
int i = 1; i < packetSize-1; ++i) {
449 values[i] =
coeff(index+i);
466 for (
int i = 0; i < NumDims; ++i) {
467 if (m_dimensions[i] !=
m_impl.dimensions()[i]) {
468 offset += m_offsets[i] * m_inputStrides[i];
469 for (
int j = i+1; j < NumDims; ++j) {
470 if (m_dimensions[j] > 1) {
473 offset += m_offsets[j] * m_inputStrides[j];
479 for (
int i = NumDims - 1; i >= 0; --i) {
480 if (m_dimensions[i] !=
m_impl.dimensions()[i]) {
481 offset += m_offsets[i] * m_inputStrides[i];
482 for (
int j = i-1; j >= 0; --j) {
483 if (m_dimensions[j] > 1) {
486 offset += m_offsets[j] * m_inputStrides[j];
492 return result + offset;
500 Index inputIndex = 0;
502 for (
int i = NumDims - 1; i > 0; --i) {
503 const Index idx = index / m_fastOutputStrides[i];
504 inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
505 index -= idx * m_outputStrides[i];
507 inputIndex += (index + m_offsets[0]);
509 for (
int i = 0; i < NumDims - 1; ++i) {
510 const Index idx = index / m_fastOutputStrides[i];
511 inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
512 index -= idx * m_outputStrides[i];
514 inputIndex += (index + m_offsets[NumDims-1]);
530 template<
typename StartIndices,
typename Sizes,
typename ArgType,
typename Device>
532 :
public TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
558 return this->
m_impl.coeffRef(this->srcCoeff(index));
565 Index inputIndices[] = {0, 0};
566 Index indices[] = {index, index + packetSize - 1};
568 for (
int i = NumDims - 1; i > 0; --i) {
569 const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
570 const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
571 inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
572 inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
573 indices[0] -= idx0 * this->m_outputStrides[i];
574 indices[1] -= idx1 * this->m_outputStrides[i];
576 inputIndices[0] += (indices[0] + this->m_offsets[0]);
577 inputIndices[1] += (indices[1] + this->m_offsets[0]);
579 for (
int i = 0; i < NumDims - 1; ++i) {
580 const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
581 const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
582 inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
583 inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
584 indices[0] -= idx0 * this->m_outputStrides[i];
585 indices[1] -= idx1 * this->m_outputStrides[i];
587 inputIndices[0] += (indices[0] + this->m_offsets[NumDims-1]);
588 inputIndices[1] += (indices[1] + this->m_offsets[NumDims-1]);
590 if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
591 this->
m_impl.template writePacket<StoreMode>(inputIndices[0],
x);
595 internal::pstore<CoeffReturnType, PacketReturnType>(values,
x);
596 this->
m_impl.coeffRef(inputIndices[0]) = values[0];
597 this->
m_impl.coeffRef(inputIndices[1]) = values[packetSize-1];
598 for (
int i = 1; i < packetSize-1; ++i) {
599 this->
coeffRef(index+i) = values[i];
608 template<
typename StartIndices,
typename StopIndices,
typename Str
ides,
typename XprType>
618 static const int Layout = XprTraits::Layout;
621 template<
typename StartIndices,
typename StopIndices,
typename Str
ides,
typename XprType>
627 template<
typename StartIndices,
typename StopIndices,
typename Str
ides,
typename XprType>
636 template<
typename StartIndices,
typename StopIndices,
typename Str
ides,
typename XprType>
667 Assign assign(*
this, other);
673 template<
typename OtherDerived>
678 Assign assign(*
this, other);
692 template<
typename StartIndices,
typename StopIndices,
typename Str
ides,
typename ArgType,
typename Device>
713 for (
size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
714 eigen_assert(m_strides[i] != 0 &&
"0 stride is invalid");
723 m_startIndices[i] = startIndicesClamped[i];
729 bool degenerate =
false;;
730 for(
int i = 0; i < NumDims; i++){
731 Index interval = stopIndicesClamped[i] - startIndicesClamped[i];
732 if(interval == 0 || ((interval<0) != (m_strides[i]<0))){
736 m_dimensions[i] = interval / m_strides[i]
737 + (interval % m_strides[i] != 0 ? 1 : 0);
741 Strides output_dims = m_dimensions;
744 m_inputStrides[0] = m_strides[0];
745 m_offsets[0] = startIndicesClamped[0];
746 Index previousDimProduct = 1;
747 for (
int i = 1; i < NumDims; ++i) {
748 previousDimProduct *= input_dims[i-1];
749 m_inputStrides[i] = previousDimProduct * m_strides[i];
750 m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
754 m_outputStrides[0] = 1;
755 for (
int i = 1; i < NumDims; ++i) {
756 m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
761 m_inputStrides[NumDims-1] = m_strides[NumDims-1];
762 m_offsets[NumDims-1] = startIndicesClamped[NumDims-1];
763 Index previousDimProduct = 1;
764 for (
int i = NumDims - 2; i >= 0; --i) {
765 previousDimProduct *= input_dims[i+1];
766 m_inputStrides[i] = previousDimProduct * m_strides[i];
767 m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
770 m_outputStrides[NumDims-1] = 1;
771 for (
int i = NumDims - 2; i >= 0; --i) {
772 m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
777 m_block_total_size_max =
numext::maxi(
static_cast<std::size_t
>(1),
778 device.lastLevelCacheSize() /
793 m_impl.evalSubExprsIfNeeded(NULL);
803 return m_impl.coeff(srcCoeff(index));
817 Index inputIndex = 0;
819 for (
int i = NumDims - 1; i >= 0; --i) {
820 const Index idx = index / m_fastOutputStrides[i];
821 inputIndex += idx * m_inputStrides[i] + m_offsets[i];
822 index -= idx * m_outputStrides[i];
825 for (
int i = 0; i < NumDims; ++i) {
826 const Index idx = index / m_fastOutputStrides[i];
827 inputIndex += idx * m_inputStrides[i] + m_offsets[i];
828 index -= idx * m_outputStrides[i];
851 template<
typename StartIndices,
typename StopIndices,
typename Str
ides,
typename ArgType,
typename Device>
853 :
public TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
881 return this->
m_impl.coeffRef(this->srcCoeff(index));
888 #endif // EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H