10 #define EIGEN_USE_THREADS 14 #include <Eigen/CXX11/Tensor> 26 template <
typename Dst,
typename Expr>
39 typename Dst,
typename Expr>
43 Vectorizable, Tiling>;
48 template <
int NumDims>
51 for (
int i = 0;
i < NumDims; ++
i) {
52 dims[
i] = internal::random<int>(min_dim, max_dim);
57 template <
typename T,
int NumDims,
typename Device,
bool Vectorizable,
61 static constexpr
int Options = 0 | Layout;
65 auto dims = RandomDims<NumDims>(50 / NumDims, 100 / NumDims);
71 const auto expr = src.square();
75 internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
85 template <
typename T,
int NumDims,
typename Device,
bool Vectorizable,
89 static constexpr
int Options = 0 | Layout;
93 auto dims = RandomDims<NumDims>(50 / NumDims, 100 / NumDims);
102 const auto expr = lhs + rhs;
106 internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
111 T sum = lhs.coeff(
i) + rhs.coeff(
i);
116 template <
typename T,
int NumDims,
typename Device,
bool Vectorizable,
120 static constexpr
int Options = 0 | Layout;
122 auto dims = RandomDims<NumDims>(1, 10);
126 const auto broadcasts = RandomDims<NumDims>(1, 7);
127 const auto expr = src.broadcast(broadcasts);
139 internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
143 for (
Index i = 0;
i < dst.dimensions().TotalSize(); ++
i) {
148 template <
typename T,
int NumDims,
typename Device,
bool Vectorizable,
152 auto dims = RandomDims<NumDims>(1, 10);
156 #define TEST_CHIPPING(CHIP_DIM) \ 157 if (NumDims > (CHIP_DIM)) { \ 158 const auto offset = internal::random<Index>(0, dims[(CHIP_DIM)] - 1); \ 159 const auto expr = src.template chip<(CHIP_DIM)>(offset); \ 161 Tensor<T, NumDims - 1, Layout, Index> golden; \ 164 Tensor<T, NumDims - 1, Layout, Index> dst(golden.dimensions()); \ 166 using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>; \ 167 using Executor = internal::TensorExecutor<const Assign, Device, \ 168 Vectorizable, Tiling>; \ 170 Executor::run(Assign(dst, expr), d); \ 172 for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) { \ 173 VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i)); \ 187 template <
typename T,
int NumDims,
typename Device,
bool Vectorizable,
191 auto dims = RandomDims<NumDims>(1, 10);
193 #define TEST_CHIPPING(CHIP_DIM) \ 194 if (NumDims > (CHIP_DIM)) { \ 196 array<Index, NumDims - 1> src_dims; \ 197 for (int i = 0; i < NumDims - 1; ++i) { \ 198 int dim = i < (CHIP_DIM) ? i : i + 1; \ 199 src_dims[i] = dims[dim]; \ 202 Tensor<T, NumDims - 1, Layout, Index> src(src_dims); \ 205 const auto offset = internal::random<Index>(0, dims[(CHIP_DIM)] - 1); \ 207 Tensor<T, NumDims, Layout, Index> random(dims); \ 210 Tensor<T, NumDims, Layout, Index> golden(dims); \ 212 golden.template chip<(CHIP_DIM)>(offset) = src; \ 214 Tensor<T, NumDims, Layout, Index> dst(dims); \ 216 auto expr = dst.template chip<(CHIP_DIM)>(offset); \ 218 using Assign = TensorAssignOp<decltype(expr), const decltype(src)>; \ 219 using Executor = internal::TensorExecutor<const Assign, Device, \ 220 Vectorizable, Tiling>; \ 222 Executor::run(Assign(expr, src), d); \ 224 for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) { \ 225 VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i)); \ 239 template <
typename T,
int NumDims,
typename Device,
bool Vectorizable,
243 static constexpr
int Options = 0 | Layout;
245 auto dims = RandomDims<NumDims>(1, 10);
250 for (
int i = 0;
i < NumDims; ++
i) shuffle[
i] =
i;
255 for (
int i = 0;
i < NumDims; ++
i) {
256 shuffled_dims[
i] = dims[shuffle[
i]];
259 const auto expr = src.shuffle(shuffle);
268 DeviceAssign<Vectorizable, Tiling>(
d, dst, expr);
270 for (
Index i = 0;
i < dst.dimensions().TotalSize(); ++
i) {
274 }
while (std::next_permutation(&shuffle[0], &shuffle[0] + NumDims));
277 template <
typename T,
int NumDims,
typename Device,
bool Vectorizable,
281 static constexpr
int Options = 0 | Layout;
283 auto dims = RandomDims<NumDims>(5, 10);
288 for (
int i = 0;
i < NumDims; ++
i) shuffle[
i] =
i;
293 for (
int i = 0;
i < NumDims; ++
i) shuffled_dims[shuffle[
i]] = dims[
i];
298 auto golden_shuffle = golden.
shuffle(shuffle);
303 auto dst_shuffle = dst.
shuffle(shuffle);
304 DeviceAssign<Vectorizable, Tiling>(
d, dst_shuffle, src);
310 }
while (std::next_permutation(&shuffle[0], &shuffle[0] + NumDims));
313 template <
typename T,
int NumDims,
typename Device,
bool Vectorizable,
317 static_assert(NumDims >= 2,
"NumDims must be greater or equal than 2");
319 static constexpr
int ReshapedDims = NumDims - 1;
320 static constexpr
int Options = 0 | Layout;
322 auto dims = RandomDims<NumDims>(5, 10);
327 std::vector<Index> shuffle;
328 for (
int i = 0;
i < ReshapedDims; ++
i) shuffle.push_back(
i);
329 std::shuffle(shuffle.begin(), shuffle.end(), std::mt19937());
332 reshaped_dims[shuffle[0]] = dims[0] * dims[1];
333 for (
int i = 1;
i < ReshapedDims; ++
i) reshaped_dims[shuffle[
i]] = dims[
i + 1];
340 auto expr = src.
reshape(reshaped_dims);
344 internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
348 for (
Index i = 0;
i < dst.dimensions().TotalSize(); ++
i) {
353 template <
typename T,
int NumDims,
typename Device,
bool Vectorizable,
357 static_assert(NumDims >= 2,
"NumDims must be greater or equal than 2");
358 static constexpr
int Options = 0 | Layout;
360 auto dims = RandomDims<NumDims>(5, 10);
369 for (
int i = 0;
i < NumDims; ++
i) {
371 slice_size[
i] =
numext::mini(slice_size[i], dims[i] - slice_start[i]);
375 src.
slice(slice_start, slice_size);
380 auto expr = src.
slice(slice_start, slice_size);
384 internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
388 for (
Index i = 0;
i < dst.dimensions().TotalSize(); ++
i) {
393 template <
typename T,
int NumDims,
typename Device,
bool Vectorizable,
397 static_assert(NumDims >= 2,
"NumDims must be greater or equal than 2");
398 static constexpr
int Options = 0 | Layout;
400 auto dims = RandomDims<NumDims>(5, 10);
409 for (
int i = 0;
i < NumDims; ++
i) {
411 slice_size[
i] =
numext::mini(slice_size[i], dims[i] - slice_start[i]);
419 golden.
slice(slice_start, slice_size) = slice;
423 auto expr = dst.
slice(slice_start, slice_size);
427 internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
436 template <
typename T,
int NumDims,
typename Device,
bool Vectorizable,
440 static constexpr
int Options = 0 | Layout;
442 auto dims = RandomDims<NumDims>(1, 10);
446 const auto broadcasts = RandomDims<NumDims>(1, 7);
447 const auto expr = src.square().eval().broadcast(broadcasts);
459 internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
463 for (
Index i = 0;
i < dst.dimensions().TotalSize(); ++
i) {
468 template<
typename T,
int NumDims>
473 for (
int i = 0;
i < NumDims; ++
i) {
474 result +=
static_cast<T>((
i + 1) * dims[
i]);
480 template <
typename T,
int NumDims,
typename Device,
bool Vectorizable,
484 static constexpr
int Options = 0 | Layout;
486 auto dims = RandomDims<NumDims>(20, 30);
502 internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
506 for (
Index i = 0;
i < dst.dimensions().TotalSize(); ++
i) {
511 template <
typename T,
int NumDims,
typename Device,
bool Vectorizable,
515 static constexpr
int Options = 0 | Layout;
517 auto dims = RandomDims<NumDims>(1,
numext::pow(1000000.0, 1.0 / NumDims));
523 for (
int i = 0; i < NumDims; ++i) reverse[i] = internal::random<bool>();
525 const auto expr = src.reverse(reverse);
537 internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
541 for (
Index i = 0;
i < dst.dimensions().TotalSize(); ++
i) {
546 template <
typename T,
int NumDims,
typename Device,
bool Vectorizable,
550 static constexpr
int Options = 0 | Layout;
554 auto dims = RandomDims<NumDims>(50 / NumDims, 100 / NumDims);
560 const auto expr = src.square();
563 auto on_done = [&done]() { done.
Notify(); };
566 using DoneCallback = decltype(on_done);
567 using Executor = internal::TensorAsyncExecutor<
const Assign, Device, DoneCallback,
568 Vectorizable, Tiling>;
570 Executor::runAsync(Assign(dst, expr), d, on_done);
579 template <
typename T,
int NumDims,
typename Device,
bool Vectorizable,
583 static constexpr
int Options = 0 | Layout;
587 auto dims = RandomDims<NumDims>(50 / NumDims, 100 / NumDims);
596 const auto expr = lhs + rhs;
599 auto on_done = [&done]() { done.
Notify(); };
602 using DoneCallback = decltype(on_done);
603 using Executor = internal::TensorAsyncExecutor<
const Assign, Device, DoneCallback,
604 Vectorizable, Tiling>;
606 Executor::runAsync(Assign(dst, expr), d, on_done);
610 T sum = lhs.coeff(
i) + rhs.coeff(
i);
615 #ifdef EIGEN_DONT_VECTORIZE 616 #define VECTORIZABLE(VAL) !EIGEN_DONT_VECTORIZE && VAL 618 #define VECTORIZABLE(VAL) VAL 621 #define CALL_SUBTEST_PART(PART) \ 624 #define CALL_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS) \ 625 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::Off, ColMajor>(default_device))); \ 626 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::On, ColMajor>(default_device))); \ 627 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), TiledEvaluation::Off, ColMajor>(default_device))); \ 628 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), TiledEvaluation::On, ColMajor>(default_device))); \ 629 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::Off, RowMajor>(default_device))); \ 630 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::On, RowMajor>(default_device))); \ 631 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), TiledEvaluation::Off, RowMajor>(default_device))); \ 632 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), TiledEvaluation::On, RowMajor>(default_device))); \ 633 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, ColMajor>(tp_device))); \ 634 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, ColMajor>(tp_device))); \ 635 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::Off, ColMajor>(tp_device))); \ 636 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::On, ColMajor>(tp_device))); \ 637 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, RowMajor>(tp_device))); \ 638 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, RowMajor>(tp_device))); \ 639 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::Off, RowMajor>(tp_device))); \ 640 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::On, RowMajor>(tp_device))) 643 #define CALL_ASYNC_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS) \ 644 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, ColMajor>(tp_device))); \ 645 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, ColMajor>(tp_device))); \ 646 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::Off, ColMajor>(tp_device))); \ 647 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::On, ColMajor>(tp_device))); \ 648 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, RowMajor>(tp_device))); \ 649 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, RowMajor>(tp_device))); \ 650 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::Off, RowMajor>(tp_device))); \ 651 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::On, RowMajor>(tp_device))) 658 const auto num_threads = internal::random<int>(20, 24);
660 Eigen::ThreadPoolDevice tp_device(&tp, num_threads);
static void test_execute_unary_expr(Device d)
static void DefaultAssign(Dst &dst, Expr expr)
#define EIGEN_ALWAYS_INLINE
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar & coeff(const array< Index, NumIndices > &indices) const
static void test_execute_generator_op(Device d)
static void test_async_execute_unary_expr(Device d)
static void test_execute_broadcasting_of_forced_eval(Device d)
static void test_execute_slice_lvalue(Device d)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor< Scalar_, NumIndices_, Options_, IndexType_ > & setRandom()
static void test_execute_chipping_rvalue(Device d)
static void test_async_execute_binary_expr(Device d)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorReshapingOp< const NewDimensions, const Derived > reshape(const NewDimensions &newDimensions) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorShufflingOp< const Shuffle, const Tensor< Scalar_, NumIndices_, Options_, IndexType_ > > shuffle(const Shuffle &shfl) const
EIGEN_DEVICE_FUNC const SquareReturnType square() const
#define VERIFY_IS_EQUAL(a, b)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex TotalSize() const
static void test_execute_reshape(Device d)
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
#define TEST_CHIPPING(CHIP_DIM)
Eigen::Triplet< double > T
EIGEN_DECLARE_TEST(cxx11_tensor_executor)
#define CALL_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS)
static void test_execute_reverse_rvalue(Device d)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorSlicingOp< const StartIndices, const Sizes, const Derived > slice(const StartIndices &startIndices, const Sizes &sizes) const
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T mini(const T &x, const T &y)
#define EIGEN_DEVICE_FUNC
static array< Index, NumDims > RandomDims(int min_dim=1, int max_dim=20)
void reverse(const MatrixType &m)
static void DeviceAssign(Device &d, Dst &dst, Expr expr)
static void test_execute_binary_expr(Device d)
#define CALL_ASYNC_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS)
static void test_execute_broadcasting(Device d)
Jet< T, N > pow(const Jet< T, N > &f, double g)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T operator()(const array< Index, NumDims > &dims) const
static void test_execute_slice_rvalue(Device d)
#define EIGEN_UNUSED_VARIABLE(var)
static void test_execute_shuffle_rvalue(Device d)
static void test_execute_chipping_lvalue(Device d)
static void test_execute_shuffle_lvalue(Device d)