10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H 23 template<
typename Generator,
typename XprType>
30 typedef typename XprType::Nested
Nested;
32 static const int NumDimensions = XprTraits::NumDimensions;
33 static const int Layout = XprTraits::Layout;
37 template<
typename Generator,
typename XprType>
43 template<
typename Generator,
typename XprType>
53 template<
typename Generator,
typename XprType>
65 : m_xpr(expr), m_generator(generator) {}
68 const Generator&
generator()
const {
return m_generator; }
81 template<
typename Generator,
typename ArgType,
typename Device>
97 PreferBlockAccess =
true,
115 : m_device(device), m_generator(op.generator())
120 if (static_cast<int>(Layout) == static_cast<int>(
ColMajor)) {
123 for (
int i = 1;
i < NumDims; ++
i) {
124 m_strides[
i] = m_strides[
i - 1] * m_dimensions[
i - 1];
125 if (m_strides[
i] != 0) m_fast_strides[
i] = IndexDivisor(m_strides[
i]);
128 m_strides[NumDims - 1] = 1;
130 for (
int i = NumDims - 2;
i >= 0; --
i) {
131 m_strides[
i] = m_strides[
i + 1] * m_dimensions[
i + 1];
132 if (m_strides[
i] != 0) m_fast_strides[
i] = IndexDivisor(m_strides[
i]);
148 extract_coordinates(index, coords);
149 return m_generator(coords);
152 template<
int LoadMode>
160 for (
int i = 0;
i < packetSize; ++
i) {
161 values[
i] = coeff(index+
i);
163 PacketReturnType rslt = internal::pload<PacketReturnType>(
values);
169 const size_t target_size = m_device.firstLevelCacheSize();
171 return internal::TensorBlockResourceRequirements::skewed<Scalar>(
175 struct BlockIteratorState {
183 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
184 bool =
false)
const {
185 static const bool is_col_major =
186 static_cast<int>(Layout) == static_cast<int>(
ColMajor);
190 extract_coordinates(desc.
offset(), coords);
199 for (
int i = 0;
i < NumDims; ++
i) {
200 const int dim = is_col_major ?
i : NumDims - 1 -
i;
202 it[
i].stride = i == 0 ? 1 : (it[i - 1].
size * it[i - 1].stride);
203 it[
i].span = it[
i].stride * (it[
i].
size - 1);
210 TensorBlock::prepareStorage(desc, scratch);
212 CoeffReturnType* block_buffer = block_storage.
data();
216 static const int inner_dim = is_col_major ? 0 : NumDims - 1;
217 const Index inner_dim_size = it[0].
size;
218 const Index inner_dim_vectorized = inner_dim_size - packet_size;
220 while (it[NumDims - 1].count < it[NumDims - 1].
size) {
223 for (; i <= inner_dim_vectorized; i += packet_size) {
224 for (Index
j = 0;
j < packet_size; ++
j) {
226 j_coords[inner_dim] +=
j;
227 *(block_buffer + offset + i +
j) = m_generator(j_coords);
229 coords[inner_dim] += packet_size;
232 for (; i < inner_dim_size; ++
i) {
233 *(block_buffer + offset +
i) = m_generator(coords);
236 coords[inner_dim] = initial_coords[inner_dim];
239 if (NumDims == 1)
break;
242 for (i = 1; i < NumDims; ++
i) {
243 if (++it[i].count < it[i].
size) {
244 offset += it[
i].stride;
245 coords[is_col_major ?
i : NumDims - 1 -
i]++;
248 if (i != NumDims - 1) it[
i].count = 0;
249 coords[is_col_major ?
i : NumDims - 1 -
i] =
250 initial_coords[is_col_major ?
i : NumDims - 1 -
i];
251 offset -= it[
i].span;
262 return TensorOpCost(0, 0, TensorOpCost::AddCost<Scalar>() +
263 TensorOpCost::MulCost<Scalar>());
268 #ifdef EIGEN_USE_SYCL 276 if (static_cast<int>(Layout) == static_cast<int>(
ColMajor)) {
277 for (
int i = NumDims - 1;
i > 0; --
i) {
278 const Index idx = index / m_fast_strides[
i];
279 index -= idx * m_strides[
i];
284 for (
int i = 0;
i < NumDims - 1; ++
i) {
285 const Index idx = index / m_fast_strides[
i];
286 index -= idx * m_strides[
i];
289 coords[NumDims-1] = index;
302 #endif // EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H const TensorGeneratorOp< Generator, XprType > & type
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void extract_coordinates(Index index, array< Index, NumDims > &coords) const
const Device EIGEN_DEVICE_REF m_device
Eigen::internal::traits< TensorGeneratorOp >::Scalar Scalar
#define EIGEN_STRONG_INLINE
EIGEN_DEVICE_FUNC const internal::remove_all< typename XprType::Nested >::type & expression() const
EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE std::size_t size()
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
TensorEvaluator< ArgType, Device >::Dimensions Dimensions
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy y set format x g set format y g set format x2 g set format y2 g set format z g set angles radians set nogrid set key title set key left top Right noreverse box linetype linewidth samplen spacing width set nolabel set noarrow set nologscale set logscale x set set pointsize set encoding default set nopolar set noparametric set set set set surface set nocontour set clabel set mapping cartesian set nohidden3d set cntrparam order set cntrparam linear set cntrparam levels auto set cntrparam points set size set set xzeroaxis lt lw set x2zeroaxis lt lw set yzeroaxis lt lw set y2zeroaxis lt lw set tics in set ticslevel set tics set mxtics default set mytics default set mx2tics default set my2tics default set xtics border mirror norotate autofreq set ytics border mirror norotate autofreq set ztics border nomirror norotate autofreq set nox2tics set noy2tics set timestamp bottom norotate offset
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Namespace containing all symbols from the Eigen library.
A cost model used to limit the number of threads used for evaluating tensor expression.
TensorMaterializedBlock AsTensorMaterializedBlock() const
#define EIGEN_STATIC_ASSERT(CONDITION, MSG)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
array< Index, NumDims > m_strides
IndexType dimension(int index) const
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
PacketType< CoeffReturnType, Device >::type PacketReturnType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType)
Generic expression where a coefficient-wise binary operator is applied to two expressions.
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
EIGEN_DEVICE_FUNC const Generator & generator() const
XprType::CoeffReturnType CoeffReturnType
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
internal::TensorMaterializedBlock< CoeffReturnType, NumDims, Layout, Index > TensorBlock
EIGEN_STRONG_INLINE void cleanup()
Eigen::internal::traits< TensorGeneratorOp >::StorageKind StorageKind
traits< XprType > XprTraits
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
EIGEN_CONSTEXPR Index size(const T &x)
#define EIGEN_DEVICE_FUNC
Eigen::internal::nested< TensorGeneratorOp >::type Nested
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool) const
const Generator m_generator
XprTraits::StorageKind StorageKind
Eigen::internal::traits< TensorGeneratorOp >::Index Index
StorageMemory< CoeffReturnType, Device > Storage
array< IndexDivisor, NumDims > m_fast_strides
internal::TensorIntDivisor< Index > IndexDivisor
Eigen::NumTraits< Scalar >::Real RealScalar
TensorGeneratorOp< Generator, XprType > type
Generic expression where a coefficient-wise unary operator is applied to an expression.
const std::vector< size_t > dimensions
Storage::Type EvaluatorPointerType
remove_reference< Nested >::type _Nested
TensorGeneratorOp< Generator, ArgType > XprType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorGeneratorOp(const XprType &expr, const Generator &generator)
#define EIGEN_UNROLL_LOOP
XprType::CoeffReturnType CoeffReturnType
XprTraits::PointerType PointerType