cxx11_tensor_executor.cpp
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2018 Eugene Zhulenev <ezhulenev@google.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #define EIGEN_USE_THREADS
11 
12 #include "main.h"
13 
14 #include <Eigen/CXX11/Tensor>
15 
16 using Eigen::Tensor;
17 using Eigen::RowMajor;
18 using Eigen::ColMajor;
20 
21 // A set of tests to verify that different TensorExecutor strategies yields the
22 // same results for all the ops, supporting tiled evaluation.
23 
24 // Default assignment that does no use block evaluation or vectorization.
25 // We assume that default coefficient evaluation is well tested and correct.
26 template <typename Dst, typename Expr>
27 static void DefaultAssign(Dst& dst, Expr expr) {
29  using Executor =
31  /*Vectorizable=*/false,
32  /*Tiling=*/TiledEvaluation::Off>;
33 
34  Executor::run(Assign(dst, expr), DefaultDevice());
35 }
36 
37 // Assignment with specified device and tiling strategy.
38 template <bool Vectorizable, TiledEvaluation Tiling, typename Device,
39  typename Dst, typename Expr>
40 static void DeviceAssign(Device& d, Dst& dst, Expr expr) {
42  using Executor = Eigen::internal::TensorExecutor<const Assign, Device,
43  Vectorizable, Tiling>;
44 
45  Executor::run(Assign(dst, expr), d);
46 }
47 
48 template <int NumDims>
49 static array<Index, NumDims> RandomDims(int min_dim = 1, int max_dim = 20) {
51  for (int i = 0; i < NumDims; ++i) {
52  dims[i] = internal::random<int>(min_dim, max_dim);
53  }
54  return dims;
55 }
56 
57 template <typename T, int NumDims, typename Device, bool Vectorizable,
58  TiledEvaluation Tiling, int Layout>
59 static void test_execute_unary_expr(Device d)
60 {
61  static constexpr int Options = 0 | Layout;
62 
63  // Pick a large enough tensor size to bypass small tensor block evaluation
64  // optimization.
65  auto dims = RandomDims<NumDims>(50 / NumDims, 100 / NumDims);
66 
69 
70  src.setRandom();
71  const auto expr = src.square();
72 
73  using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
74  using Executor =
75  internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
76 
77  Executor::run(Assign(dst, expr), d);
78 
79  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
80  T square = src.coeff(i) * src.coeff(i);
82  }
83 }
84 
85 template <typename T, int NumDims, typename Device, bool Vectorizable,
86  TiledEvaluation Tiling, int Layout>
87 static void test_execute_binary_expr(Device d)
88 {
89  static constexpr int Options = 0 | Layout;
90 
91  // Pick a large enough tensor size to bypass small tensor block evaluation
92  // optimization.
93  auto dims = RandomDims<NumDims>(50 / NumDims, 100 / NumDims);
94 
98 
99  lhs.setRandom();
100  rhs.setRandom();
101 
102  const auto expr = lhs + rhs;
103 
104  using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
105  using Executor =
106  internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
107 
108  Executor::run(Assign(dst, expr), d);
109 
110  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
111  T sum = lhs.coeff(i) + rhs.coeff(i);
112  VERIFY_IS_EQUAL(sum, dst.coeff(i));
113  }
114 }
115 
116 template <typename T, int NumDims, typename Device, bool Vectorizable,
117  TiledEvaluation Tiling, int Layout>
118 static void test_execute_broadcasting(Device d)
119 {
120  static constexpr int Options = 0 | Layout;
121 
122  auto dims = RandomDims<NumDims>(1, 10);
124  src.setRandom();
125 
126  const auto broadcasts = RandomDims<NumDims>(1, 7);
127  const auto expr = src.broadcast(broadcasts);
128 
129  // We assume that broadcasting on a default device is tested and correct, so
130  // we can rely on it to verify correctness of tensor executor and tiling.
132  golden = expr;
133 
134  // Now do the broadcasting using configured tensor executor.
136 
137  using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
138  using Executor =
139  internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
140 
141  Executor::run(Assign(dst, expr), d);
142 
143  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
144  VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
145  }
146 }
147 
148 template <typename T, int NumDims, typename Device, bool Vectorizable,
149  TiledEvaluation Tiling, int Layout>
150 static void test_execute_chipping_rvalue(Device d)
151 {
152  auto dims = RandomDims<NumDims>(1, 10);
154  src.setRandom();
155 
156 #define TEST_CHIPPING(CHIP_DIM) \
157  if (NumDims > (CHIP_DIM)) { \
158  const auto offset = internal::random<Index>(0, dims[(CHIP_DIM)] - 1); \
159  const auto expr = src.template chip<(CHIP_DIM)>(offset); \
160  \
161  Tensor<T, NumDims - 1, Layout, Index> golden; \
162  golden = expr; \
163  \
164  Tensor<T, NumDims - 1, Layout, Index> dst(golden.dimensions()); \
165  \
166  using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>; \
167  using Executor = internal::TensorExecutor<const Assign, Device, \
168  Vectorizable, Tiling>; \
169  \
170  Executor::run(Assign(dst, expr), d); \
171  \
172  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) { \
173  VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i)); \
174  } \
175  }
176 
177  TEST_CHIPPING(0)
178  TEST_CHIPPING(1)
179  TEST_CHIPPING(2)
180  TEST_CHIPPING(3)
181  TEST_CHIPPING(4)
182  TEST_CHIPPING(5)
183 
184 #undef TEST_CHIPPING
185 }
186 
187 template <typename T, int NumDims, typename Device, bool Vectorizable,
188  TiledEvaluation Tiling, int Layout>
189 static void test_execute_chipping_lvalue(Device d)
190 {
191  auto dims = RandomDims<NumDims>(1, 10);
192 
193 #define TEST_CHIPPING(CHIP_DIM) \
194  if (NumDims > (CHIP_DIM)) { \
195  /* Generate random data that we'll assign to the chipped tensor dim. */ \
196  array<Index, NumDims - 1> src_dims; \
197  for (int i = 0; i < NumDims - 1; ++i) { \
198  int dim = i < (CHIP_DIM) ? i : i + 1; \
199  src_dims[i] = dims[dim]; \
200  } \
201  \
202  Tensor<T, NumDims - 1, Layout, Index> src(src_dims); \
203  src.setRandom(); \
204  \
205  const auto offset = internal::random<Index>(0, dims[(CHIP_DIM)] - 1); \
206  \
207  Tensor<T, NumDims, Layout, Index> random(dims); \
208  random.setZero(); \
209  \
210  Tensor<T, NumDims, Layout, Index> golden(dims); \
211  golden = random; \
212  golden.template chip<(CHIP_DIM)>(offset) = src; \
213  \
214  Tensor<T, NumDims, Layout, Index> dst(dims); \
215  dst = random; \
216  auto expr = dst.template chip<(CHIP_DIM)>(offset); \
217  \
218  using Assign = TensorAssignOp<decltype(expr), const decltype(src)>; \
219  using Executor = internal::TensorExecutor<const Assign, Device, \
220  Vectorizable, Tiling>; \
221  \
222  Executor::run(Assign(expr, src), d); \
223  \
224  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) { \
225  VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i)); \
226  } \
227  }
228 
229  TEST_CHIPPING(0)
230  TEST_CHIPPING(1)
231  TEST_CHIPPING(2)
232  TEST_CHIPPING(3)
233  TEST_CHIPPING(4)
234  TEST_CHIPPING(5)
235 
236 #undef TEST_CHIPPING
237 }
238 
239 template <typename T, int NumDims, typename Device, bool Vectorizable,
240  TiledEvaluation Tiling, int Layout>
241 static void test_execute_shuffle_rvalue(Device d)
242 {
243  static constexpr int Options = 0 | Layout;
244 
245  auto dims = RandomDims<NumDims>(1, 10);
247  src.setRandom();
248 
249  DSizes<Index, NumDims> shuffle;
250  for (int i = 0; i < NumDims; ++i) shuffle[i] = i;
251 
252  // Test all possible shuffle permutations.
253  do {
254  DSizes<Index, NumDims> shuffled_dims;
255  for (int i = 0; i < NumDims; ++i) {
256  shuffled_dims[i] = dims[shuffle[i]];
257  }
258 
259  const auto expr = src.shuffle(shuffle);
260 
261  // We assume that shuffling on a default device is tested and correct, so
262  // we can rely on it to verify correctness of tensor executor and tiling.
263  Tensor<T, NumDims, Options, Index> golden(shuffled_dims);
264  DefaultAssign(golden, expr);
265 
266  // Now do the shuffling using configured tensor executor.
267  Tensor<T, NumDims, Options, Index> dst(shuffled_dims);
268  DeviceAssign<Vectorizable, Tiling>(d, dst, expr);
269 
270  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
271  VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
272  }
273 
274  } while (std::next_permutation(&shuffle[0], &shuffle[0] + NumDims));
275 }
276 
277 template <typename T, int NumDims, typename Device, bool Vectorizable,
278  TiledEvaluation Tiling, int Layout>
279 static void test_execute_shuffle_lvalue(Device d)
280 {
281  static constexpr int Options = 0 | Layout;
282 
283  auto dims = RandomDims<NumDims>(5, 10);
285  src.setRandom();
286 
287  DSizes<Index, NumDims> shuffle;
288  for (int i = 0; i < NumDims; ++i) shuffle[i] = i;
289 
290  // Test all possible shuffle permutations.
291  do {
292  DSizes<Index, NumDims> shuffled_dims;
293  for (int i = 0; i < NumDims; ++i) shuffled_dims[shuffle[i]] = dims[i];
294 
295  // We assume that shuffling on a default device is tested and correct, so
296  // we can rely on it to verify correctness of tensor executor and tiling.
297  Tensor<T, NumDims, Options, Index> golden(shuffled_dims);
298  auto golden_shuffle = golden.shuffle(shuffle);
299  DefaultAssign(golden_shuffle, src);
300 
301  // Now do the shuffling using configured tensor executor.
302  Tensor<T, NumDims, Options, Index> dst(shuffled_dims);
303  auto dst_shuffle = dst.shuffle(shuffle);
304  DeviceAssign<Vectorizable, Tiling>(d, dst_shuffle, src);
305 
306  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
307  VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
308  }
309 
310  } while (std::next_permutation(&shuffle[0], &shuffle[0] + NumDims));
311 }
312 
313 template <typename T, int NumDims, typename Device, bool Vectorizable,
314  TiledEvaluation Tiling, int Layout>
315 static void test_execute_reshape(Device d)
316 {
317  static_assert(NumDims >= 2, "NumDims must be greater or equal than 2");
318 
319  static constexpr int ReshapedDims = NumDims - 1;
320  static constexpr int Options = 0 | Layout;
321 
322  auto dims = RandomDims<NumDims>(5, 10);
324  src.setRandom();
325 
326  // Multiple 0th dimension and then shuffle.
327  std::vector<Index> shuffle;
328  for (int i = 0; i < ReshapedDims; ++i) shuffle.push_back(i);
329  std::shuffle(shuffle.begin(), shuffle.end(), std::mt19937());
330 
331  DSizes<Index, ReshapedDims> reshaped_dims;
332  reshaped_dims[shuffle[0]] = dims[0] * dims[1];
333  for (int i = 1; i < ReshapedDims; ++i) reshaped_dims[shuffle[i]] = dims[i + 1];
334 
335  Tensor<T, ReshapedDims, Options, Index> golden = src.reshape(reshaped_dims);
336 
337  // Now reshape using configured tensor executor.
339 
340  auto expr = src.reshape(reshaped_dims);
341 
342  using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
343  using Executor =
344  internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
345 
346  Executor::run(Assign(dst, expr), d);
347 
348  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
349  VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
350  }
351 }
352 
353 template <typename T, int NumDims, typename Device, bool Vectorizable,
354  TiledEvaluation Tiling, int Layout>
355 static void test_execute_slice_rvalue(Device d)
356 {
357  static_assert(NumDims >= 2, "NumDims must be greater or equal than 2");
358  static constexpr int Options = 0 | Layout;
359 
360  auto dims = RandomDims<NumDims>(5, 10);
362  src.setRandom();
363 
364  // Pick a random slice of src tensor.
365  auto slice_start = DSizes<Index, NumDims>(RandomDims<NumDims>());
366  auto slice_size = DSizes<Index, NumDims>(RandomDims<NumDims>());
367 
368  // Make sure that slice start + size do not overflow tensor dims.
369  for (int i = 0; i < NumDims; ++i) {
370  slice_start[i] = numext::mini(dims[i] - 1, slice_start[i]);
371  slice_size[i] = numext::mini(slice_size[i], dims[i] - slice_start[i]);
372  }
373 
375  src.slice(slice_start, slice_size);
376 
377  // Now reshape using configured tensor executor.
379 
380  auto expr = src.slice(slice_start, slice_size);
381 
382  using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
383  using Executor =
384  internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
385 
386  Executor::run(Assign(dst, expr), d);
387 
388  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
389  VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
390  }
391 }
392 
393 template <typename T, int NumDims, typename Device, bool Vectorizable,
394  TiledEvaluation Tiling, int Layout>
395 static void test_execute_slice_lvalue(Device d)
396 {
397  static_assert(NumDims >= 2, "NumDims must be greater or equal than 2");
398  static constexpr int Options = 0 | Layout;
399 
400  auto dims = RandomDims<NumDims>(5, 10);
402  src.setRandom();
403 
404  // Pick a random slice of src tensor.
405  auto slice_start = DSizes<Index, NumDims>(RandomDims<NumDims>(1, 10));
406  auto slice_size = DSizes<Index, NumDims>(RandomDims<NumDims>(1, 10));
407 
408  // Make sure that slice start + size do not overflow tensor dims.
409  for (int i = 0; i < NumDims; ++i) {
410  slice_start[i] = numext::mini(dims[i] - 1, slice_start[i]);
411  slice_size[i] = numext::mini(slice_size[i], dims[i] - slice_start[i]);
412  }
413 
415  slice.setRandom();
416 
417  // Assign a slice using default executor.
419  golden.slice(slice_start, slice_size) = slice;
420 
421  // And using configured execution strategy.
423  auto expr = dst.slice(slice_start, slice_size);
424 
425  using Assign = TensorAssignOp<decltype(expr), const decltype(slice)>;
426  using Executor =
427  internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
428 
429  Executor::run(Assign(expr, slice), d);
430 
431  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
432  VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
433  }
434 }
435 
436 template <typename T, int NumDims, typename Device, bool Vectorizable,
437  TiledEvaluation Tiling, int Layout>
439 {
440  static constexpr int Options = 0 | Layout;
441 
442  auto dims = RandomDims<NumDims>(1, 10);
444  src.setRandom();
445 
446  const auto broadcasts = RandomDims<NumDims>(1, 7);
447  const auto expr = src.square().eval().broadcast(broadcasts);
448 
449  // We assume that broadcasting on a default device is tested and correct, so
450  // we can rely on it to verify correctness of tensor executor and tiling.
452  golden = expr;
453 
454  // Now do the broadcasting using configured tensor executor.
456 
457  using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
458  using Executor =
459  internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
460 
461  Executor::run(Assign(dst, expr), d);
462 
463  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
464  VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
465  }
466 }
467 
468 template<typename T, int NumDims>
471  T operator()(const array <Index, NumDims>& dims) const {
472  T result = static_cast<T>(0);
473  for (int i = 0; i < NumDims; ++i) {
474  result += static_cast<T>((i + 1) * dims[i]);
475  }
476  return result;
477  }
478 };
479 
480 template <typename T, int NumDims, typename Device, bool Vectorizable,
481  TiledEvaluation Tiling, int Layout>
482 static void test_execute_generator_op(Device d)
483 {
484  static constexpr int Options = 0 | Layout;
485 
486  auto dims = RandomDims<NumDims>(20, 30);
488  src.setRandom();
489 
490  const auto expr = src.generate(DummyGenerator<T, NumDims>());
491 
492  // We assume that generator on a default device is tested and correct, so
493  // we can rely on it to verify correctness of tensor executor and tiling.
495  golden = expr;
496 
497  // Now do the broadcasting using configured tensor executor.
499 
500  using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
501  using Executor =
502  internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
503 
504  Executor::run(Assign(dst, expr), d);
505 
506  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
507  VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
508  }
509 }
510 
511 template <typename T, int NumDims, typename Device, bool Vectorizable,
512  TiledEvaluation Tiling, int Layout>
513 static void test_execute_reverse_rvalue(Device d)
514 {
515  static constexpr int Options = 0 | Layout;
516 
517  auto dims = RandomDims<NumDims>(1, numext::pow(1000000.0, 1.0 / NumDims));
519  src.setRandom();
520 
521  // Reverse half of the dimensions.
523  for (int i = 0; i < NumDims; ++i) reverse[i] = internal::random<bool>();
524 
525  const auto expr = src.reverse(reverse);
526 
527  // We assume that reversing on a default device is tested and correct, so
528  // we can rely on it to verify correctness of tensor executor and tiling.
530  golden = expr;
531 
532  // Now do the reversing using configured tensor executor.
534 
535  using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
536  using Executor =
537  internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
538 
539  Executor::run(Assign(dst, expr), d);
540 
541  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
542  VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
543  }
544 }
545 
546 template <typename T, int NumDims, typename Device, bool Vectorizable,
547  TiledEvaluation Tiling, int Layout>
548 static void test_async_execute_unary_expr(Device d)
549 {
550  static constexpr int Options = 0 | Layout;
551 
552  // Pick a large enough tensor size to bypass small tensor block evaluation
553  // optimization.
554  auto dims = RandomDims<NumDims>(50 / NumDims, 100 / NumDims);
555 
558 
559  src.setRandom();
560  const auto expr = src.square();
561 
562  Eigen::Barrier done(1);
563  auto on_done = [&done]() { done.Notify(); };
564 
565  using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
566  using DoneCallback = decltype(on_done);
567  using Executor = internal::TensorAsyncExecutor<const Assign, Device, DoneCallback,
568  Vectorizable, Tiling>;
569 
570  Executor::runAsync(Assign(dst, expr), d, on_done);
571  done.Wait();
572 
573  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
574  T square = src.coeff(i) * src.coeff(i);
575  VERIFY_IS_EQUAL(square, dst.coeff(i));
576  }
577 }
578 
579 template <typename T, int NumDims, typename Device, bool Vectorizable,
580  TiledEvaluation Tiling, int Layout>
582 {
583  static constexpr int Options = 0 | Layout;
584 
585  // Pick a large enough tensor size to bypass small tensor block evaluation
586  // optimization.
587  auto dims = RandomDims<NumDims>(50 / NumDims, 100 / NumDims);
588 
592 
593  lhs.setRandom();
594  rhs.setRandom();
595 
596  const auto expr = lhs + rhs;
597 
598  Eigen::Barrier done(1);
599  auto on_done = [&done]() { done.Notify(); };
600 
601  using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
602  using DoneCallback = decltype(on_done);
603  using Executor = internal::TensorAsyncExecutor<const Assign, Device, DoneCallback,
604  Vectorizable, Tiling>;
605 
606  Executor::runAsync(Assign(dst, expr), d, on_done);
607  done.Wait();
608 
609  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
610  T sum = lhs.coeff(i) + rhs.coeff(i);
611  VERIFY_IS_EQUAL(sum, dst.coeff(i));
612  }
613 }
614 
615 #ifdef EIGEN_DONT_VECTORIZE
616 #define VECTORIZABLE(VAL) !EIGEN_DONT_VECTORIZE && VAL
617 #else
618 #define VECTORIZABLE(VAL) VAL
619 #endif
620 
621 #define CALL_SUBTEST_PART(PART) \
622  CALL_SUBTEST_##PART
623 
624 #define CALL_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS) \
625  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::Off, ColMajor>(default_device))); \
626  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::On, ColMajor>(default_device))); \
627  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), TiledEvaluation::Off, ColMajor>(default_device))); \
628  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), TiledEvaluation::On, ColMajor>(default_device))); \
629  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::Off, RowMajor>(default_device))); \
630  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::On, RowMajor>(default_device))); \
631  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), TiledEvaluation::Off, RowMajor>(default_device))); \
632  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), TiledEvaluation::On, RowMajor>(default_device))); \
633  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, ColMajor>(tp_device))); \
634  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, ColMajor>(tp_device))); \
635  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::Off, ColMajor>(tp_device))); \
636  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::On, ColMajor>(tp_device))); \
637  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, RowMajor>(tp_device))); \
638  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, RowMajor>(tp_device))); \
639  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::Off, RowMajor>(tp_device))); \
640  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::On, RowMajor>(tp_device)))
641 
642 // NOTE: Currently only ThreadPoolDevice supports async expression evaluation.
643 #define CALL_ASYNC_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS) \
644  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, ColMajor>(tp_device))); \
645  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, ColMajor>(tp_device))); \
646  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::Off, ColMajor>(tp_device))); \
647  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::On, ColMajor>(tp_device))); \
648  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, RowMajor>(tp_device))); \
649  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, RowMajor>(tp_device))); \
650  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::Off, RowMajor>(tp_device))); \
651  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::On, RowMajor>(tp_device)))
652 
653 EIGEN_DECLARE_TEST(cxx11_tensor_executor) {
654  Eigen::DefaultDevice default_device;
655  // Default device is unused in ASYNC tests.
656  EIGEN_UNUSED_VARIABLE(default_device);
657 
658  const auto num_threads = internal::random<int>(20, 24);
659  Eigen::ThreadPool tp(num_threads);
660  Eigen::ThreadPoolDevice tp_device(&tp, num_threads);
661 
665 
669 
673 
677 
681 
685 
689 
694 
699 
704 
709 
714 
720 
724 
728 
729  // Force CMake to split this test.
730  // EIGEN_SUFFIXES;1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16
731 }
Eigen::Tensor
The tensor class.
Definition: Tensor.h:63
test_execute_slice_rvalue
static void test_execute_slice_rvalue(Device d)
Definition: cxx11_tensor_executor.cpp:355
EIGEN_DEVICE_FUNC
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:976
Eigen::internal::TiledEvaluation
TiledEvaluation
Definition: TensorForwardDeclarations.h:158
test_async_execute_unary_expr
static void test_async_execute_unary_expr(Device d)
Definition: cxx11_tensor_executor.cpp:548
d
static const double d[K][N]
Definition: igam.h:11
test_execute_slice_lvalue
static void test_execute_slice_lvalue(Device d)
Definition: cxx11_tensor_executor.cpp:395
Eigen::array< Index, NumDims >
VERIFY_IS_EQUAL
#define VERIFY_IS_EQUAL(a, b)
Definition: main.h:386
Eigen::Tensor::coeff
EIGEN_DEVICE_FUNC const EIGEN_STRONG_INLINE Scalar & coeff(const array< Index, NumIndices > &indices) const
Definition: Tensor.h:124
test_async_execute_binary_expr
static void test_async_execute_binary_expr(Device d)
Definition: cxx11_tensor_executor.cpp:581
test_execute_unary_expr
static void test_execute_unary_expr(Device d)
Definition: cxx11_tensor_executor.cpp:59
DefaultAssign
static void DefaultAssign(Dst &dst, Expr expr)
Definition: cxx11_tensor_executor.cpp:27
Eigen::TensorBase< Tensor< Scalar_, NumIndices_, Options_, IndexType_ > >::reshape
EIGEN_DEVICE_FUNC const EIGEN_STRONG_INLINE TensorReshapingOp< const NewDimensions, const Tensor< Scalar_, NumIndices_, Options_, IndexType_ > > reshape(const NewDimensions &newDimensions) const
Definition: TensorBase.h:1055
test_execute_broadcasting_of_forced_eval
static void test_execute_broadcasting_of_forced_eval(Device d)
Definition: cxx11_tensor_executor.cpp:438
T
Eigen::Triplet< double > T
Definition: Tutorial_sparse_example.cpp:6
test_execute_generator_op
static void test_execute_generator_op(Device d)
Definition: cxx11_tensor_executor.cpp:482
Eigen::RowMajor
@ RowMajor
Definition: Constants.h:321
Eigen::Barrier::Wait
void Wait()
Definition: Barrier.h:40
Eigen::TensorBase< Tensor< Scalar_, NumIndices_, Options_, IndexType_ > >::shuffle
EIGEN_DEVICE_FUNC const EIGEN_STRONG_INLINE TensorShufflingOp< const Shuffle, const Tensor< Scalar_, NumIndices_, Options_, IndexType_ > > shuffle(const Shuffle &shfl) const
Definition: TensorBase.h:1123
test_execute_chipping_rvalue
static void test_execute_chipping_rvalue(Device d)
Definition: cxx11_tensor_executor.cpp:150
result
Values result
Definition: OdometryOptimize.cpp:8
test_execute_reshape
static void test_execute_reshape(Device d)
Definition: cxx11_tensor_executor.cpp:315
Eigen::TensorBase< Tensor< Scalar_, NumIndices_, Options_, IndexType_ > >::reverse
EIGEN_DEVICE_FUNC const EIGEN_STRONG_INLINE TensorReverseOp< const ReverseDimensions, const Tensor< Scalar_, NumIndices_, Options_, IndexType_ > > reverse(const ReverseDimensions &rev) const
Definition: TensorBase.h:1112
Eigen::DSizes< Index, NumDims >
Eigen::DefaultDevice
Definition: TensorDeviceDefault.h:17
slice
Definition: pytypes.h:1909
EIGEN_DECLARE_TEST
EIGEN_DECLARE_TEST(cxx11_tensor_executor)
Definition: cxx11_tensor_executor.cpp:653
EIGEN_UNUSED_VARIABLE
#define EIGEN_UNUSED_VARIABLE(var)
Definition: Macros.h:1076
CALL_SUBTEST_COMBINATIONS
#define CALL_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS)
Definition: cxx11_tensor_executor.cpp:624
test_execute_reverse_rvalue
static void test_execute_reverse_rvalue(Device d)
Definition: cxx11_tensor_executor.cpp:513
Eigen::Tensor::dimensions
EIGEN_DEVICE_FUNC const EIGEN_STRONG_INLINE Dimensions & dimensions() const
Definition: Tensor.h:102
square
const EIGEN_DEVICE_FUNC SquareReturnType square() const
Definition: ArrayCwiseUnaryOps.h:425
Eigen::numext::mini
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T mini(const T &x, const T &y)
Definition: Eigen/src/Core/MathFunctions.h:1085
gtsam.examples.DogLegOptimizerExample.run
def run(args)
Definition: DogLegOptimizerExample.py:21
EIGEN_ALWAYS_INLINE
#define EIGEN_ALWAYS_INLINE
Definition: Macros.h:932
Eigen::Triplet< double >
ceres::pow
Jet< T, N > pow(const Jet< T, N > &f, double g)
Definition: jet.h:570
Eigen::ThreadPoolTempl
Definition: NonBlockingThreadPool.h:16
Eigen::TensorBase< Tensor< Scalar_, NumIndices_, Options_, IndexType_ > >::setRandom
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor< Scalar_, NumIndices_, Options_, IndexType_ > & setRandom()
Definition: TensorBase.h:996
TEST_CHIPPING
#define TEST_CHIPPING(CHIP_DIM)
Eigen::internal::TensorExecutor
Definition: TensorExecutor.h:81
RandomDims
static array< Index, NumDims > RandomDims(int min_dim=1, int max_dim=20)
Definition: cxx11_tensor_executor.cpp:49
DeviceAssign
static void DeviceAssign(Device &d, Dst &dst, Expr expr)
Definition: cxx11_tensor_executor.cpp:40
Eigen::internal::Off
@ Off
Definition: TensorForwardDeclarations.h:159
main.h
DummyGenerator::operator()
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T operator()(const array< Index, NumDims > &dims) const
Definition: cxx11_tensor_executor.cpp:471
test_execute_broadcasting
static void test_execute_broadcasting(Device d)
Definition: cxx11_tensor_executor.cpp:118
test_execute_shuffle_rvalue
static void test_execute_shuffle_rvalue(Device d)
Definition: cxx11_tensor_executor.cpp:241
test_execute_chipping_lvalue
static void test_execute_chipping_lvalue(Device d)
Definition: cxx11_tensor_executor.cpp:189
test_execute_binary_expr
static void test_execute_binary_expr(Device d)
Definition: cxx11_tensor_executor.cpp:87
test_execute_shuffle_lvalue
static void test_execute_shuffle_lvalue(Device d)
Definition: cxx11_tensor_executor.cpp:279
CALL_ASYNC_SUBTEST_COMBINATIONS
#define CALL_ASYNC_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS)
Definition: cxx11_tensor_executor.cpp:643
reverse
void reverse(const MatrixType &m)
Definition: array_reverse.cpp:16
Eigen::Barrier
Definition: Barrier.h:18
Eigen::ColMajor
@ ColMajor
Definition: Constants.h:319
Eigen::TensorBase< Tensor< Scalar_, NumIndices_, Options_, IndexType_ > >::slice
EIGEN_DEVICE_FUNC const EIGEN_STRONG_INLINE TensorSlicingOp< const StartIndices, const Sizes, const Tensor< Scalar_, NumIndices_, Options_, IndexType_ > > slice(const StartIndices &startIndices, const Sizes &sizes) const
Definition: TensorBase.h:1066
Eigen::TensorAssignOp
Definition: TensorAssign.h:61
i
int i
Definition: BiCGSTAB_step_by_step.cpp:9
Eigen::Barrier::Notify
void Notify()
Definition: Barrier.h:25
DummyGenerator
Definition: cxx11_tensor_executor.cpp:469
Eigen::Index
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:74


gtsam
Author(s):
autogenerated on Wed Jan 1 2025 04:01:23