10 #define EIGEN_USE_THREADS
14 #include <Eigen/CXX11/Tensor>
26 template <
typename Dst,
typename Expr>
39 typename Dst,
typename Expr>
43 Vectorizable, Tiling>;
48 template <
int NumDims>
51 for (
int i = 0;
i < NumDims; ++
i) {
52 dims[
i] = internal::random<int>(min_dim, max_dim);
57 template <
typename T,
int NumDims,
typename Device,
bool Vectorizable,
61 static constexpr
int Options = 0 | Layout;
65 auto dims = RandomDims<NumDims>(50 / NumDims, 100 / NumDims);
71 const auto expr = src.square();
73 using Assign =
TensorAssignOp<decltype(dst),
const decltype(expr)>;
75 internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
85 template <
typename T,
int NumDims,
typename Device,
bool Vectorizable,
89 static constexpr
int Options = 0 | Layout;
93 auto dims = RandomDims<NumDims>(50 / NumDims, 100 / NumDims);
102 const auto expr = lhs + rhs;
104 using Assign =
TensorAssignOp<decltype(dst),
const decltype(expr)>;
106 internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
111 T sum = lhs.
coeff(
i) + rhs.coeff(
i);
116 template <
typename T,
int NumDims,
typename Device,
bool Vectorizable,
120 static constexpr
int Options = 0 | Layout;
122 auto dims = RandomDims<NumDims>(1, 10);
126 const auto broadcasts = RandomDims<NumDims>(1, 7);
127 const auto expr = src.broadcast(broadcasts);
137 using Assign =
TensorAssignOp<decltype(dst),
const decltype(expr)>;
139 internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
148 template <
typename T,
int NumDims,
typename Device,
bool Vectorizable,
152 auto dims = RandomDims<NumDims>(1, 10);
156 #define TEST_CHIPPING(CHIP_DIM) \
157 if (NumDims > (CHIP_DIM)) { \
158 const auto offset = internal::random<Index>(0, dims[(CHIP_DIM)] - 1); \
159 const auto expr = src.template chip<(CHIP_DIM)>(offset); \
161 Tensor<T, NumDims - 1, Layout, Index> golden; \
164 Tensor<T, NumDims - 1, Layout, Index> dst(golden.dimensions()); \
166 using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>; \
167 using Executor = internal::TensorExecutor<const Assign, Device, \
168 Vectorizable, Tiling>; \
170 Executor::run(Assign(dst, expr), d); \
172 for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) { \
173 VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i)); \
187 template <
typename T,
int NumDims,
typename Device,
bool Vectorizable,
191 auto dims = RandomDims<NumDims>(1, 10);
193 #define TEST_CHIPPING(CHIP_DIM) \
194 if (NumDims > (CHIP_DIM)) { \
196 array<Index, NumDims - 1> src_dims; \
197 for (int i = 0; i < NumDims - 1; ++i) { \
198 int dim = i < (CHIP_DIM) ? i : i + 1; \
199 src_dims[i] = dims[dim]; \
202 Tensor<T, NumDims - 1, Layout, Index> src(src_dims); \
205 const auto offset = internal::random<Index>(0, dims[(CHIP_DIM)] - 1); \
207 Tensor<T, NumDims, Layout, Index> random(dims); \
210 Tensor<T, NumDims, Layout, Index> golden(dims); \
212 golden.template chip<(CHIP_DIM)>(offset) = src; \
214 Tensor<T, NumDims, Layout, Index> dst(dims); \
216 auto expr = dst.template chip<(CHIP_DIM)>(offset); \
218 using Assign = TensorAssignOp<decltype(expr), const decltype(src)>; \
219 using Executor = internal::TensorExecutor<const Assign, Device, \
220 Vectorizable, Tiling>; \
222 Executor::run(Assign(expr, src), d); \
224 for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) { \
225 VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i)); \
239 template <
typename T,
int NumDims,
typename Device,
bool Vectorizable,
243 static constexpr
int Options = 0 | Layout;
245 auto dims = RandomDims<NumDims>(1, 10);
250 for (
int i = 0;
i < NumDims; ++
i) shuffle[
i] =
i;
255 for (
int i = 0;
i < NumDims; ++
i) {
256 shuffled_dims[
i] = dims[shuffle[
i]];
259 const auto expr = src.
shuffle(shuffle);
268 DeviceAssign<Vectorizable, Tiling>(
d, dst, expr);
274 }
while (std::next_permutation(&shuffle[0], &shuffle[0] + NumDims));
277 template <
typename T,
int NumDims,
typename Device,
bool Vectorizable,
281 static constexpr
int Options = 0 | Layout;
283 auto dims = RandomDims<NumDims>(5, 10);
288 for (
int i = 0;
i < NumDims; ++
i) shuffle[
i] =
i;
293 for (
int i = 0;
i < NumDims; ++
i) shuffled_dims[shuffle[
i]] = dims[
i];
298 auto golden_shuffle = golden.
shuffle(shuffle);
303 auto dst_shuffle = dst.
shuffle(shuffle);
304 DeviceAssign<Vectorizable, Tiling>(
d, dst_shuffle, src);
310 }
while (std::next_permutation(&shuffle[0], &shuffle[0] + NumDims));
313 template <
typename T,
int NumDims,
typename Device,
bool Vectorizable,
317 static_assert(NumDims >= 2,
"NumDims must be greater or equal than 2");
319 static constexpr
int ReshapedDims = NumDims - 1;
320 static constexpr
int Options = 0 | Layout;
322 auto dims = RandomDims<NumDims>(5, 10);
327 std::vector<Index> shuffle;
328 for (
int i = 0;
i < ReshapedDims; ++
i) shuffle.push_back(
i);
329 std::shuffle(shuffle.begin(), shuffle.end(), std::mt19937());
332 reshaped_dims[shuffle[0]] = dims[0] * dims[1];
333 for (
int i = 1;
i < ReshapedDims; ++
i) reshaped_dims[shuffle[
i]] = dims[
i + 1];
340 auto expr = src.
reshape(reshaped_dims);
342 using Assign =
TensorAssignOp<decltype(dst),
const decltype(expr)>;
344 internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
353 template <
typename T,
int NumDims,
typename Device,
bool Vectorizable,
357 static_assert(NumDims >= 2,
"NumDims must be greater or equal than 2");
358 static constexpr
int Options = 0 | Layout;
360 auto dims = RandomDims<NumDims>(5, 10);
369 for (
int i = 0;
i < NumDims; ++
i) {
375 src.
slice(slice_start, slice_size);
380 auto expr = src.
slice(slice_start, slice_size);
382 using Assign =
TensorAssignOp<decltype(dst),
const decltype(expr)>;
384 internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
393 template <
typename T,
int NumDims,
typename Device,
bool Vectorizable,
397 static_assert(NumDims >= 2,
"NumDims must be greater or equal than 2");
398 static constexpr
int Options = 0 | Layout;
400 auto dims = RandomDims<NumDims>(5, 10);
409 for (
int i = 0;
i < NumDims; ++
i) {
419 golden.
slice(slice_start, slice_size) =
slice;
423 auto expr = dst.
slice(slice_start, slice_size);
427 internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
436 template <
typename T,
int NumDims,
typename Device,
bool Vectorizable,
440 static constexpr
int Options = 0 | Layout;
442 auto dims = RandomDims<NumDims>(1, 10);
446 const auto broadcasts = RandomDims<NumDims>(1, 7);
447 const auto expr = src.square().eval().broadcast(broadcasts);
457 using Assign =
TensorAssignOp<decltype(dst),
const decltype(expr)>;
459 internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
468 template<
typename T,
int NumDims>
473 for (
int i = 0;
i < NumDims; ++
i) {
474 result +=
static_cast<T>((
i + 1) * dims[
i]);
480 template <
typename T,
int NumDims,
typename Device,
bool Vectorizable,
484 static constexpr
int Options = 0 | Layout;
486 auto dims = RandomDims<NumDims>(20, 30);
500 using Assign =
TensorAssignOp<decltype(dst),
const decltype(expr)>;
502 internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
511 template <
typename T,
int NumDims,
typename Device,
bool Vectorizable,
515 static constexpr
int Options = 0 | Layout;
517 auto dims = RandomDims<NumDims>(1,
numext::pow(1000000.0, 1.0 / NumDims));
523 for (
int i = 0; i < NumDims; ++i) reverse[i] = internal::random<bool>();
535 using Assign =
TensorAssignOp<decltype(dst),
const decltype(expr)>;
537 internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
546 template <
typename T,
int NumDims,
typename Device,
bool Vectorizable,
550 static constexpr
int Options = 0 | Layout;
554 auto dims = RandomDims<NumDims>(50 / NumDims, 100 / NumDims);
560 const auto expr = src.square();
563 auto on_done = [&done]() { done.
Notify(); };
565 using Assign =
TensorAssignOp<decltype(dst),
const decltype(expr)>;
566 using DoneCallback = decltype(on_done);
567 using Executor = internal::TensorAsyncExecutor<
const Assign, Device, DoneCallback,
568 Vectorizable, Tiling>;
570 Executor::runAsync(Assign(dst, expr),
d, on_done);
579 template <
typename T,
int NumDims,
typename Device,
bool Vectorizable,
583 static constexpr
int Options = 0 | Layout;
587 auto dims = RandomDims<NumDims>(50 / NumDims, 100 / NumDims);
596 const auto expr = lhs + rhs;
599 auto on_done = [&done]() { done.
Notify(); };
601 using Assign =
TensorAssignOp<decltype(dst),
const decltype(expr)>;
602 using DoneCallback = decltype(on_done);
603 using Executor = internal::TensorAsyncExecutor<
const Assign, Device, DoneCallback,
604 Vectorizable, Tiling>;
606 Executor::runAsync(Assign(dst, expr),
d, on_done);
610 T sum = lhs.
coeff(
i) + rhs.coeff(
i);
615 #ifdef EIGEN_DONT_VECTORIZE
616 #define VECTORIZABLE(VAL) !EIGEN_DONT_VECTORIZE && VAL
618 #define VECTORIZABLE(VAL) VAL
621 #define CALL_SUBTEST_PART(PART) \
624 #define CALL_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS) \
625 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::Off, ColMajor>(default_device))); \
626 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::On, ColMajor>(default_device))); \
627 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), TiledEvaluation::Off, ColMajor>(default_device))); \
628 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), TiledEvaluation::On, ColMajor>(default_device))); \
629 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::Off, RowMajor>(default_device))); \
630 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::On, RowMajor>(default_device))); \
631 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), TiledEvaluation::Off, RowMajor>(default_device))); \
632 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), TiledEvaluation::On, RowMajor>(default_device))); \
633 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, ColMajor>(tp_device))); \
634 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, ColMajor>(tp_device))); \
635 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::Off, ColMajor>(tp_device))); \
636 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::On, ColMajor>(tp_device))); \
637 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, RowMajor>(tp_device))); \
638 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, RowMajor>(tp_device))); \
639 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::Off, RowMajor>(tp_device))); \
640 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::On, RowMajor>(tp_device)))
643 #define CALL_ASYNC_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS) \
644 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, ColMajor>(tp_device))); \
645 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, ColMajor>(tp_device))); \
646 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::Off, ColMajor>(tp_device))); \
647 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::On, ColMajor>(tp_device))); \
648 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, RowMajor>(tp_device))); \
649 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, RowMajor>(tp_device))); \
650 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::Off, RowMajor>(tp_device))); \
651 CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::On, RowMajor>(tp_device)))
658 const auto num_threads = internal::random<int>(20, 24);
660 Eigen::ThreadPoolDevice tp_device(&tp, num_threads);