14 #define EIGEN_TEST_NO_LONGDOUBLE 15 #define EIGEN_TEST_NO_COMPLEX 17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t 18 #define EIGEN_USE_SYCL 19 #define EIGEN_HAS_CONSTEXPR 1 23 #include <unsupported/Eigen/CXX11/Tensor> 25 template <
typename DataType,
int DataLayout,
typename IndexType>
27 const Eigen::SyclDevice& sycl_device) {
28 const IndexType num_rows = 753;
29 const IndexType num_cols = 537;
40 full_redux = in.sum().
reshape(dim);
42 DataType* gpu_in_data =
static_cast<DataType*
>(
44 DataType* gpu_out_data = (DataType*)sycl_device.allocate(
51 sycl_device.memcpyHostToDevice(
54 sycl_device.memcpyDeviceToHost(
55 full_redux_gpu.
data(), gpu_out_data,
58 std::cout <<
"SYCL FULL :" << full_redux_gpu(0, 0)
59 <<
", CPU FULL: " << full_redux(0, 0) <<
"\n";
61 sycl_device.deallocate(gpu_in_data);
62 sycl_device.deallocate(gpu_out_data);
65 template <
typename DataType,
int DataLayout,
typename IndexType>
67 const Eigen::SyclDevice& sycl_device) {
70 const IndexType num_rows = 64;
71 const IndexType num_cols = 64;
75 data_tensor in(tensor_range);
76 scalar_tensor full_redux;
77 scalar_tensor full_redux_gpu;
81 tensor_offset_range[0] -= 1;
83 const IndexType
offset = 64;
85 full_redux = in_offset.sum();
87 DataType* gpu_in_data =
88 static_cast<DataType*
>(sycl_device.allocate(n_elems *
sizeof(DataType)));
89 DataType* gpu_out_data =
90 static_cast<DataType*
>(sycl_device.allocate(
sizeof(DataType)));
94 sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),
95 n_elems *
sizeof(DataType));
96 out_gpu.
device(sycl_device) = in_gpu.sum();
97 sycl_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_data,
103 sycl_device.deallocate(gpu_in_data);
104 sycl_device.deallocate(gpu_out_data);
107 template <
typename DataType,
int DataLayout,
typename IndexType>
109 const Eigen::SyclDevice& sycl_device) {
110 const IndexType num_rows = 4096;
111 const IndexType num_cols = 4096;
120 full_redux = in.maximum();
122 DataType* gpu_in_data =
static_cast<DataType*
>(
124 DataType* gpu_out_data = (DataType*)sycl_device.allocate(
sizeof(DataType));
129 sycl_device.memcpyHostToDevice(
131 out_gpu.
device(sycl_device) = in_gpu.maximum();
132 sycl_device.memcpyDeviceToHost(full_redux_gpu.
data(), gpu_out_data,
135 sycl_device.deallocate(gpu_in_data);
136 sycl_device.deallocate(gpu_out_data);
139 template <
typename DataType,
int DataLayout,
typename IndexType>
141 const Eigen::SyclDevice& sycl_device) {
144 const IndexType num_rows = 64;
145 const IndexType num_cols = 64;
149 data_tensor in(tensor_range);
150 scalar_tensor full_redux;
151 scalar_tensor full_redux_gpu;
155 tensor_offset_range[0] -= 1;
158 in(0) =
static_cast<DataType
>(2);
160 const IndexType
offset = 64;
162 full_redux = in_offset.maximum();
165 DataType* gpu_in_data =
166 static_cast<DataType*
>(sycl_device.allocate(n_elems *
sizeof(DataType)));
167 DataType* gpu_out_data =
168 static_cast<DataType*
>(sycl_device.allocate(
sizeof(DataType)));
172 sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),
173 n_elems *
sizeof(DataType));
174 out_gpu.
device(sycl_device) = in_gpu.maximum();
175 sycl_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_data,
181 sycl_device.deallocate(gpu_in_data);
182 sycl_device.deallocate(gpu_out_data);
185 template <
typename DataType,
int DataLayout,
typename IndexType>
187 const Eigen::SyclDevice& sycl_device) {
188 const IndexType num_rows = 4096;
189 const IndexType num_cols = 4096;
208 DataType* gpu_in_data =
static_cast<DataType*
>(
210 DataType* gpu_in_arg1_data =
static_cast<DataType*
>(sycl_device.allocate(
212 DataType* gpu_in_arg2_data =
static_cast<DataType*
>(sycl_device.allocate(
214 bool* gpu_out_arg__gpu_helper_data =
static_cast<bool*
>(sycl_device.allocate(
216 bool* gpu_out_arg_data =
static_cast<bool*
>(sycl_device.allocate(
219 DataType* gpu_out_data = (DataType*)sycl_device.allocate(
sizeof(DataType));
224 gpu_in_arg1_data, tensorRange);
226 gpu_in_arg2_data, tensorRange);
228 gpu_out_arg_data, argRange);
230 gpu_out_arg__gpu_helper_data, argRange);
235 (in_arg1.argmax(1) == in_arg2.argmax(1))
236 .select(out_arg_cpu.constant(
true), out_arg_cpu.constant(
false));
237 full_redux = (out_arg_cpu.template cast<float>())
241 sycl_device.memcpyHostToDevice(
243 sycl_device.memcpyHostToDevice(
244 gpu_in_arg1_data, in_arg1.
data(),
246 sycl_device.memcpyHostToDevice(
247 gpu_in_arg2_data, in_arg2.
data(),
249 out_Argout_gpu_helper.
device(sycl_device) =
250 (in_Arg1_gpu.argmax(1) == in_Arg2_gpu.argmax(1));
251 out_Argout_gpu.
device(sycl_device) =
252 (out_Argout_gpu_helper)
253 .select(out_Argout_gpu.constant(
true),
254 out_Argout_gpu.constant(
false));
255 out_gpu.
device(sycl_device) =
256 (out_Argout_gpu.template cast<float>())
258 sycl_device.memcpyDeviceToHost(full_redux_gpu.
data(), gpu_out_data,
261 std::cout <<
"SYCL : " << full_redux_gpu() <<
" , CPU : " << full_redux()
264 sycl_device.deallocate(gpu_in_data);
265 sycl_device.deallocate(gpu_in_arg1_data);
266 sycl_device.deallocate(gpu_in_arg2_data);
267 sycl_device.deallocate(gpu_out_arg__gpu_helper_data);
268 sycl_device.deallocate(gpu_out_arg_data);
269 sycl_device.deallocate(gpu_out_data);
272 template <
typename DataType,
int DataLayout,
typename IndexType>
274 const Eigen::SyclDevice& sycl_device) {
277 const IndexType num_rows = 64;
278 const IndexType num_cols = 64;
282 data_tensor in(tensor_range);
283 scalar_tensor full_redux;
284 scalar_tensor full_redux_gpu;
288 tensor_offset_range[0] -= 1;
290 const IndexType
offset = 64;
292 full_redux = in_offset.mean();
295 DataType* gpu_in_data =
296 static_cast<DataType*
>(sycl_device.allocate(n_elems *
sizeof(DataType)));
297 DataType* gpu_out_data =
298 static_cast<DataType*
>(sycl_device.allocate(
sizeof(DataType)));
302 sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),
303 n_elems *
sizeof(DataType));
304 out_gpu.
device(sycl_device) = in_gpu.mean();
305 sycl_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_data,
311 sycl_device.deallocate(gpu_in_data);
312 sycl_device.deallocate(gpu_out_data);
315 template <
typename DataType,
int DataLayout,
typename IndexType>
317 const Eigen::SyclDevice& sycl_device) {
324 const IndexType n_elems = 8707;
327 data_tensor in(tensor_range);
329 DataType full_redux_gpu;
333 const DataType const_val =
static_cast<DataType
>(0.6391);
334 in = in.constant(const_val);
336 Eigen::IndexList<Eigen::type2index<0>> red_axis;
340 DataType* gpu_in_data =
341 static_cast<DataType*
>(sycl_device.allocate(n_elems *
sizeof(DataType)));
342 DataType* gpu_out_data =
343 static_cast<DataType*
>(sycl_device.allocate(
sizeof(DataType)));
347 sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),
348 n_elems *
sizeof(DataType));
349 out_gpu.
device(sycl_device) =
351 sycl_device.memcpyDeviceToHost(red_gpu.
data(), gpu_out_data,
357 sycl_device.deallocate(gpu_in_data);
358 sycl_device.deallocate(gpu_out_data);
361 template <
typename DataType,
int DataLayout,
typename IndexType>
363 const Eigen::SyclDevice& sycl_device) {
364 const IndexType num_rows = 876;
365 const IndexType num_cols = 953;
374 full_redux = in.minimum();
376 DataType* gpu_in_data =
static_cast<DataType*
>(
378 DataType* gpu_out_data = (DataType*)sycl_device.allocate(
sizeof(DataType));
384 sycl_device.memcpyHostToDevice(
386 out_gpu.
device(sycl_device) = in_gpu.minimum();
387 sycl_device.memcpyDeviceToHost(full_redux_gpu.
data(), gpu_out_data,
391 sycl_device.deallocate(gpu_in_data);
392 sycl_device.deallocate(gpu_out_data);
395 template <
typename DataType,
int DataLayout,
typename IndexType>
397 const Eigen::SyclDevice& sycl_device) {
400 const IndexType num_rows = 64;
401 const IndexType num_cols = 64;
405 data_tensor in(tensor_range);
406 scalar_tensor full_redux;
407 scalar_tensor full_redux_gpu;
411 tensor_offset_range[0] -= 1;
414 in(0) =
static_cast<DataType
>(-2);
416 const IndexType
offset = 64;
418 full_redux = in_offset.minimum();
421 DataType* gpu_in_data =
422 static_cast<DataType*
>(sycl_device.allocate(n_elems *
sizeof(DataType)));
423 DataType* gpu_out_data =
424 static_cast<DataType*
>(sycl_device.allocate(
sizeof(DataType)));
428 sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),
429 n_elems *
sizeof(DataType));
430 out_gpu.
device(sycl_device) = in_gpu.minimum();
431 sycl_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_data,
437 sycl_device.deallocate(gpu_in_data);
438 sycl_device.deallocate(gpu_out_data);
440 template <
typename DataType,
int DataLayout,
typename IndexType>
442 const Eigen::SyclDevice& sycl_device) {
443 IndexType dim_x = 145;
445 IndexType dim_z = 67;
458 redux = in.maximum(red_axis);
460 DataType* gpu_in_data =
static_cast<DataType*
>(
462 DataType* gpu_out_data =
static_cast<DataType*
>(sycl_device.allocate(
468 gpu_out_data, reduced_tensorRange);
470 sycl_device.memcpyHostToDevice(
472 out_gpu.
device(sycl_device) = in_gpu.maximum(red_axis);
473 sycl_device.memcpyDeviceToHost(
474 redux_gpu.
data(), gpu_out_data,
478 for (IndexType
j = 0;
j < reduced_tensorRange[0];
j++)
479 for (IndexType k = 0; k < reduced_tensorRange[1]; k++)
482 sycl_device.deallocate(gpu_in_data);
483 sycl_device.deallocate(gpu_out_data);
486 template <
typename DataType,
int DataLayout,
typename IndexType>
488 const Eigen::SyclDevice& sycl_device) {
492 const IndexType num_rows = 64;
493 const IndexType num_cols = 64;
497 const IndexType n_reduced = num_cols;
499 data_tensor in(tensor_range);
500 reduced_tensor redux;
501 reduced_tensor redux_gpu(reduced_range);
505 tensor_offset_range[0] -= 1;
507 for (IndexType
i = 0;
i < n_reduced;
i++) {
508 in(
i) =
static_cast<DataType
>(2);
514 const IndexType
offset = 64;
516 redux = in_offset.maximum(red_axis);
517 for (IndexType
i = 0;
i < n_reduced;
i++) {
521 DataType* gpu_in_data =
522 static_cast<DataType*
>(sycl_device.allocate(n_elems *
sizeof(DataType)));
523 DataType* gpu_out_data =
static_cast<DataType*
>(
524 sycl_device.allocate(n_reduced *
sizeof(DataType)));
528 sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),
529 n_elems *
sizeof(DataType));
530 out_gpu.
device(sycl_device) = in_gpu.maximum(red_axis);
531 sycl_device.memcpyDeviceToHost(redux_gpu.data(), gpu_out_data,
532 n_reduced *
sizeof(DataType));
535 for (IndexType
i = 0;
i < n_reduced;
i++) {
539 sycl_device.deallocate(gpu_in_data);
540 sycl_device.deallocate(gpu_out_data);
543 template <
typename DataType,
int DataLayout,
typename IndexType>
545 const Eigen::SyclDevice& sycl_device) {
549 const IndexType num_rows = 64;
550 const IndexType num_cols = 64;
555 const IndexType n_reduced = reduced_range[0];
557 data_tensor in(tensor_range);
558 reduced_tensor redux(full_reduced_range);
559 reduced_tensor redux_gpu(reduced_range);
564 tensor_offset_range[0] -= 1;
566 for (IndexType
i = 0;
i < n_reduced;
i++) {
567 in(
i) =
static_cast<DataType
>(2);
573 const IndexType
offset = 64;
577 red_offset = in_offset.maximum(red_axis);
582 for (IndexType
i = 0;
i < n_reduced;
i++) {
586 DataType* gpu_in_data =
587 static_cast<DataType*
>(sycl_device.allocate(n_elems *
sizeof(DataType)));
588 DataType* gpu_out_data =
static_cast<DataType*
>(
589 sycl_device.allocate((n_reduced + 1) *
sizeof(DataType)));
593 sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),
594 n_elems *
sizeof(DataType));
595 out_gpu.
device(sycl_device) = in_gpu.maximum(red_axis);
596 sycl_device.memcpyDeviceToHost(redux_gpu.data(), out_gpu.
data(),
597 n_reduced *
sizeof(DataType));
600 for (IndexType
i = 0;
i < n_reduced;
i++) {
604 sycl_device.deallocate(gpu_in_data);
605 sycl_device.deallocate(gpu_out_data);
608 template <
typename DataType,
int DataLayout,
typename IndexType>
610 const Eigen::SyclDevice& sycl_device, IndexType dim_x, IndexType dim_y) {
621 redux = in.sum(red_axis);
623 DataType* gpu_in_data =
static_cast<DataType*
>(
625 DataType* gpu_out_data =
static_cast<DataType*
>(sycl_device.allocate(
631 gpu_out_data, reduced_tensorRange);
633 sycl_device.memcpyHostToDevice(
635 out_gpu.
device(sycl_device) = in_gpu.sum(red_axis);
636 sycl_device.memcpyDeviceToHost(
637 redux_gpu.
data(), gpu_out_data,
641 for (IndexType
i = 0;
i < redux.
size();
i++) {
644 sycl_device.deallocate(gpu_in_data);
645 sycl_device.deallocate(gpu_out_data);
648 template <
typename DataType,
int DataLayout,
typename IndexType>
650 const Eigen::SyclDevice& sycl_device) {
651 IndexType dim_x = 145;
653 IndexType dim_z = 67;
666 redux = in.mean(red_axis);
668 DataType* gpu_in_data =
static_cast<DataType*
>(
670 DataType* gpu_out_data =
static_cast<DataType*
>(sycl_device.allocate(
676 gpu_out_data, reduced_tensorRange);
678 sycl_device.memcpyHostToDevice(
680 out_gpu.
device(sycl_device) = in_gpu.mean(red_axis);
681 sycl_device.memcpyDeviceToHost(
682 redux_gpu.
data(), gpu_out_data,
686 for (IndexType
j = 0;
j < reduced_tensorRange[0];
j++)
687 for (IndexType k = 0; k < reduced_tensorRange[1]; k++)
690 sycl_device.deallocate(gpu_in_data);
691 sycl_device.deallocate(gpu_out_data);
694 template <
typename DataType,
int DataLayout,
typename IndexType>
696 const Eigen::SyclDevice& sycl_device) {
697 IndexType dim_x = 64;
699 IndexType dim_z = 32;
712 redux = in.mean(red_axis);
714 DataType* gpu_in_data =
static_cast<DataType*
>(
716 DataType* gpu_out_data =
static_cast<DataType*
>(sycl_device.allocate(
722 gpu_out_data, reduced_tensorRange);
724 sycl_device.memcpyHostToDevice(
726 out_gpu.
device(sycl_device) = in_gpu.mean(red_axis);
727 sycl_device.memcpyDeviceToHost(
728 redux_gpu.
data(), gpu_out_data,
731 for (IndexType
j = 0;
j < reduced_tensorRange[0];
j++)
732 for (IndexType k = 0; k < reduced_tensorRange[1]; k++)
735 sycl_device.deallocate(gpu_in_data);
736 sycl_device.deallocate(gpu_out_data);
739 template <
typename DataType,
int DataLayout,
typename IndexType>
741 const Eigen::SyclDevice& sycl_device) {
742 IndexType dim_x = 64;
744 IndexType dim_z = 32;
757 redux = in.sum(red_axis);
759 DataType* gpu_in_data =
static_cast<DataType*
>(
761 DataType* gpu_out_data =
static_cast<DataType*
>(sycl_device.allocate(
767 gpu_out_data, reduced_tensorRange);
769 sycl_device.memcpyHostToDevice(
771 out_gpu.
device(sycl_device) = in_gpu.sum(red_axis);
772 sycl_device.memcpyDeviceToHost(
773 redux_gpu.
data(), gpu_out_data,
776 for (IndexType
j = 0;
j < reduced_tensorRange[0];
j++)
777 for (IndexType k = 0; k < reduced_tensorRange[1]; k++)
780 sycl_device.deallocate(gpu_in_data);
781 sycl_device.deallocate(gpu_out_data);
784 template <
typename DataType,
int DataLayout,
typename IndexType>
786 const Eigen::SyclDevice& sycl_device) {
789 Eigen::IndexList<Eigen::type2index<1>> red_axis;
790 auto reduced_tensorRange =
Sizes<64>(64);
797 redux_fix = in_fix.sum(red_axis);
799 DataType* gpu_in_data =
static_cast<DataType*
>(
800 sycl_device.allocate(in_fix.dimensions().
TotalSize() *
sizeof(DataType)));
801 DataType* gpu_out_data =
static_cast<DataType*
>(sycl_device.allocate(
802 redux_gpu_fix.
dimensions().TotalSize() *
sizeof(DataType)));
805 gpu_in_data, tensorRange);
807 gpu_out_data, reduced_tensorRange);
809 sycl_device.memcpyHostToDevice(
810 gpu_in_data, in_fix.data(),
811 (in_fix.dimensions().
TotalSize()) *
sizeof(DataType));
812 out_gpu_fix.
device(sycl_device) = in_gpu_fix.sum(red_axis);
813 sycl_device.memcpyDeviceToHost(
814 redux_gpu_fix.
data(), gpu_out_data,
815 redux_gpu_fix.
dimensions().TotalSize() *
sizeof(DataType));
817 for (IndexType
j = 0;
j < reduced_tensorRange[0];
j++) {
821 sycl_device.deallocate(gpu_in_data);
822 sycl_device.deallocate(gpu_out_data);
825 template <
typename DataType,
int DataLayout,
typename IndexType>
827 const Eigen::SyclDevice& sycl_device) {
829 Eigen::IndexList<Eigen::type2index<1>> red_axis;
830 auto reduced_tensorRange =
Sizes<64>(64);
836 redux_fix = in_fix.mean(red_axis);
838 DataType* gpu_in_data =
static_cast<DataType*
>(
839 sycl_device.allocate(in_fix.dimensions().
TotalSize() *
sizeof(DataType)));
840 DataType* gpu_out_data =
static_cast<DataType*
>(sycl_device.allocate(
841 redux_gpu_fix.
dimensions().TotalSize() *
sizeof(DataType)));
844 gpu_in_data, tensorRange);
846 gpu_out_data, reduced_tensorRange);
848 sycl_device.memcpyHostToDevice(
849 gpu_in_data, in_fix.data(),
850 (in_fix.dimensions().
TotalSize()) *
sizeof(DataType));
851 out_gpu_fix.
device(sycl_device) = in_gpu_fix.mean(red_axis);
852 sycl_device.memcpyDeviceToHost(
853 redux_gpu_fix.
data(), gpu_out_data,
854 redux_gpu_fix.
dimensions().TotalSize() *
sizeof(DataType));
855 sycl_device.synchronize();
857 for (IndexType
j = 0;
j < reduced_tensorRange[0];
j++) {
861 sycl_device.deallocate(gpu_in_data);
862 sycl_device.deallocate(gpu_out_data);
868 template <
typename InT,
typename OutT>
898 template <
typename DataType,
typename AccumType,
int DataLayout,
901 const Eigen::SyclDevice& sycl_device) {
902 constexpr IndexType InSize = 64;
904 Eigen::IndexList<Eigen::type2index<0>> dims;
905 auto reduced_tensorRange =
Sizes<>();
913 size_t in_size_bytes = in_fix.dimensions().
TotalSize() *
sizeof(DataType);
914 DataType* gpu_in_data =
915 static_cast<DataType*
>(sycl_device.allocate(in_size_bytes));
916 AccumType* gpu_out_data =
917 static_cast<AccumType*
>(sycl_device.allocate(
sizeof(AccumType)));
920 gpu_in_data, tensorRange);
922 gpu_out_data, reduced_tensorRange);
924 sycl_device.memcpyHostToDevice(gpu_in_data, in_fix.data(), in_size_bytes);
925 out_gpu_fix.
device(sycl_device) = in_gpu_fix.reduce(dims, reducer);
926 sycl_device.memcpyDeviceToHost(redux_gpu_fix.
data(), gpu_out_data,
930 sycl_device.deallocate(gpu_in_data);
931 sycl_device.deallocate(gpu_out_data);
934 template <
typename DataType,
typename Dev>
936 test_full_reductions_sum_sycl<DataType, RowMajor, int64_t>(sycl_device);
937 test_full_reductions_sum_sycl<DataType, ColMajor, int64_t>(sycl_device);
938 test_full_reductions_min_sycl<DataType, ColMajor, int64_t>(sycl_device);
939 test_full_reductions_min_sycl<DataType, RowMajor, int64_t>(sycl_device);
940 test_full_reductions_max_sycl<DataType, ColMajor, int64_t>(sycl_device);
941 test_full_reductions_max_sycl<DataType, RowMajor, int64_t>(sycl_device);
943 test_full_reductions_mean_sycl<DataType, ColMajor, int64_t>(sycl_device);
944 test_full_reductions_mean_sycl<DataType, RowMajor, int64_t>(sycl_device);
945 test_full_reductions_custom_sycl<DataType, int, RowMajor, int64_t>(
947 test_full_reductions_custom_sycl<DataType, int, ColMajor, int64_t>(
949 sycl_device.synchronize();
952 template <
typename DataType,
typename Dev>
954 test_full_reductions_sum_with_offset_sycl<DataType, RowMajor, int64_t>(
956 test_full_reductions_sum_with_offset_sycl<DataType, ColMajor, int64_t>(
958 test_full_reductions_min_with_offset_sycl<DataType, RowMajor, int64_t>(
960 test_full_reductions_min_with_offset_sycl<DataType, ColMajor, int64_t>(
962 test_full_reductions_max_with_offset_sycl<DataType, ColMajor, int64_t>(
964 test_full_reductions_max_with_offset_sycl<DataType, RowMajor, int64_t>(
966 test_full_reductions_mean_with_offset_sycl<DataType, RowMajor, int64_t>(
968 test_full_reductions_mean_with_offset_sycl<DataType, ColMajor, int64_t>(
970 test_full_reductions_mean_with_odd_offset_sycl<DataType, RowMajor, int64_t>(
972 sycl_device.synchronize();
975 template <
typename DataType,
typename Dev>
977 test_first_dim_reductions_sum_sycl<DataType, ColMajor, int64_t>(sycl_device,
979 test_first_dim_reductions_sum_sycl<DataType, RowMajor, int64_t>(sycl_device,
981 test_first_dim_reductions_sum_sycl<DataType, RowMajor, int64_t>(sycl_device,
983 test_first_dim_reductions_max_sycl<DataType, RowMajor, int64_t>(sycl_device);
984 test_first_dim_reductions_max_with_offset_sycl<DataType, RowMajor, int64_t>(
986 sycl_device.synchronize();
989 template <
typename DataType,
typename Dev>
991 test_last_dim_reductions_sum_sycl<DataType, RowMajor, int64_t>(sycl_device);
992 test_last_dim_reductions_max_with_offset_sycl<DataType, RowMajor, int64_t>(
994 test_last_reductions_sum_sycl<DataType, ColMajor, int64_t>(sycl_device);
995 test_last_reductions_sum_sycl<DataType, RowMajor, int64_t>(sycl_device);
996 test_last_reductions_mean_sycl<DataType, ColMajor, int64_t>(sycl_device);
997 test_last_reductions_mean_sycl<DataType, RowMajor, int64_t>(sycl_device);
998 sycl_device.synchronize();
1002 for (
const auto& device : Eigen::get_sycl_supported_devices()) {
1003 std::cout <<
"Running on " 1004 << device.template get_info<cl::sycl::info::device::name>()
1006 QueueInterface queueInterface(device);
1007 auto sycl_device = Eigen::SyclDevice(&queueInterface);
1008 CALL_SUBTEST_1(sycl_reduction_test_full_per_device<float>(sycl_device));
1009 CALL_SUBTEST_2(sycl_reduction_full_offset_per_device<float>(sycl_device));
1011 sycl_reduction_test_first_dim_per_device<float>(sycl_device));
1012 CALL_SUBTEST_4(sycl_reduction_test_last_dim_per_device<float>(sycl_device));
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const
static void test_first_dim_reductions_mean_sycl(const Eigen::SyclDevice &sycl_device)
static void test_full_reductions_mean_sycl(const Eigen::SyclDevice &sycl_device)
static void test_full_reductions_min_sycl(const Eigen::SyclDevice &sycl_device)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_prod(const Sizes< Indices... > &)
#define EIGEN_STRONG_INLINE
static const bool PacketAccess
#define CALL_SUBTEST_4(FUNC)
#define VERIFY_IS_NOT_EQUAL(a, b)
static const bool IsStateful
void sycl_reduction_test_last_dim_per_device(const Dev &sycl_device)
static void test_full_reductions_sum_sycl(const Eigen::SyclDevice &sycl_device)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar * data()
void sycl_reduction_full_offset_per_device(const Dev &sycl_device)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
#define CALL_SUBTEST_3(FUNC)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor< Scalar_, NumIndices_, Options_, IndexType_ > & setRandom()
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy y set format x g set format y g set format x2 g set format y2 g set format z g set angles radians set nogrid set key title set key left top Right noreverse box linetype linewidth samplen spacing width set nolabel set noarrow set nologscale set logscale x set set pointsize set encoding default set nopolar set noparametric set set set set surface set nocontour set clabel set mapping cartesian set nohidden3d set cntrparam order set cntrparam linear set cntrparam levels auto set cntrparam points set size set set xzeroaxis lt lw set x2zeroaxis lt lw set yzeroaxis lt lw set y2zeroaxis lt lw set tics in set ticslevel set tics set mxtics default set mytics default set mx2tics default set my2tics default set xtics border mirror norotate autofreq set ytics border mirror norotate autofreq set ztics border nomirror norotate autofreq set nox2tics set noy2tics set timestamp bottom norotate offset
static void test_first_dim_reductions_sum_sycl(const Eigen::SyclDevice &sycl_device, IndexType dim_x, IndexType dim_y)
static void test_last_reductions_sum_sycl(const Eigen::SyclDevice &sycl_device)
static void test_first_dim_reductions_max_with_offset_sycl(const Eigen::SyclDevice &sycl_device)
static constexpr OutT NanBit
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorReshapingOp< const NewDimensions, const Tensor< Scalar_, NumIndices_, Options_, IndexType_ > > reshape(const NewDimensions &newDimensions) const
static void test_full_reductions_min_with_offset_sycl(const Eigen::SyclDevice &sycl_device)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StoragePointerType data()
#define VERIFY_IS_APPROX(a, b)
static void test_first_dim_reductions_max_sycl(const Eigen::SyclDevice &sycl_device)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE OutT initialize() const
static void test_last_dim_reductions_mean_sycl(const Eigen::SyclDevice &sycl_device)
#define VERIFY_IS_EQUAL(a, b)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex TotalSize() const
#define CALL_SUBTEST_1(FUNC)
A tensor expression mapping an existing array of data.
static void test_last_dim_reductions_max_with_offset_sycl(const Eigen::SyclDevice &sycl_device)
static void test_last_reductions_mean_sycl(const Eigen::SyclDevice &sycl_device)
static void test_full_reductions_mean_with_offset_sycl(const Eigen::SyclDevice &sycl_device)
TensorDevice< TensorMap< PlainObjectType, Options_, MakePointer_ >, DeviceType > device(const DeviceType &dev)
static void test_full_reductions_max_sycl(const Eigen::SyclDevice &sycl_device)
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool() isinf(const Eigen::bfloat16 &h)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const InT x, OutT *accum) const
The fixed sized version of the tensor class.
#define EIGEN_DEVICE_FUNC
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar * data()
static void test_full_reductions_max_with_offset_sycl(const Eigen::SyclDevice &sycl_device)
static void test_last_dim_reductions_sum_sycl(const Eigen::SyclDevice &sycl_device)
void sycl_reduction_test_full_per_device(const Dev &sycl_device)
void sycl_reduction_test_first_dim_per_device(const Dev &sycl_device)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t TotalSize()
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool() isnan(const Eigen::bfloat16 &h)
static void test_full_reductions_custom_sycl(const Eigen::SyclDevice &sycl_device)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE OutT finalize(const OutT accum) const
static void test_full_reductions_sum_with_offset_sycl(const Eigen::SyclDevice &sycl_device)
#define CALL_SUBTEST_2(FUNC)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const OutT x, OutT *accum) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy x
EIGEN_DECLARE_TEST(cxx11_tensor_reduction_sycl)
static void test_full_reductions_mean_with_odd_offset_sycl(const Eigen::SyclDevice &sycl_device)
static constexpr OutT InfBit
static const int DataLayout