14 #define EIGEN_TEST_NO_LONGDOUBLE
15 #define EIGEN_TEST_NO_COMPLEX
17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
18 #define EIGEN_USE_SYCL
19 #define EIGEN_HAS_CONSTEXPR 1
23 #include <unsupported/Eigen/CXX11/Tensor>
25 template <
typename DataType,
int DataLayout,
typename IndexType>
27 const Eigen::SyclDevice& sycl_device) {
28 const IndexType num_rows = 753;
29 const IndexType num_cols = 537;
40 full_redux = in.sum().
reshape(dim);
42 DataType* gpu_in_data =
static_cast<DataType*
>(
44 DataType* gpu_out_data = (DataType*)sycl_device.allocate(
51 sycl_device.memcpyHostToDevice(
53 out_gpu.device(sycl_device) = in_gpu.sum().reshape(dim);
54 sycl_device.memcpyDeviceToHost(
55 full_redux_gpu.
data(), gpu_out_data,
58 std::cout <<
"SYCL FULL :" << full_redux_gpu(0, 0)
59 <<
", CPU FULL: " << full_redux(0, 0) <<
"\n";
61 sycl_device.deallocate(gpu_in_data);
62 sycl_device.deallocate(gpu_out_data);
65 template <
typename DataType,
int DataLayout,
typename IndexType>
67 const Eigen::SyclDevice& sycl_device) {
70 const IndexType num_rows = 64;
71 const IndexType num_cols = 64;
75 data_tensor in(tensor_range);
76 scalar_tensor full_redux;
77 scalar_tensor full_redux_gpu;
81 tensor_offset_range[0] -= 1;
83 const IndexType
offset = 64;
85 full_redux = in_offset.sum();
87 DataType* gpu_in_data =
88 static_cast<DataType*
>(sycl_device.allocate(n_elems *
sizeof(DataType)));
89 DataType* gpu_out_data =
90 static_cast<DataType*
>(sycl_device.allocate(
sizeof(DataType)));
94 sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),
95 n_elems *
sizeof(DataType));
96 out_gpu.device(sycl_device) = in_gpu.sum();
97 sycl_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_data,
103 sycl_device.deallocate(gpu_in_data);
104 sycl_device.deallocate(gpu_out_data);
107 template <
typename DataType,
int DataLayout,
typename IndexType>
109 const Eigen::SyclDevice& sycl_device) {
110 const IndexType num_rows = 4096;
111 const IndexType num_cols = 4096;
120 full_redux = in.maximum();
122 DataType* gpu_in_data =
static_cast<DataType*
>(
124 DataType* gpu_out_data = (DataType*)sycl_device.allocate(
sizeof(DataType));
129 sycl_device.memcpyHostToDevice(
131 out_gpu.device(sycl_device) = in_gpu.maximum();
132 sycl_device.memcpyDeviceToHost(full_redux_gpu.
data(), gpu_out_data,
135 sycl_device.deallocate(gpu_in_data);
136 sycl_device.deallocate(gpu_out_data);
139 template <
typename DataType,
int DataLayout,
typename IndexType>
141 const Eigen::SyclDevice& sycl_device) {
144 const IndexType num_rows = 64;
145 const IndexType num_cols = 64;
149 data_tensor in(tensor_range);
150 scalar_tensor full_redux;
151 scalar_tensor full_redux_gpu;
155 tensor_offset_range[0] -= 1;
158 in(0) =
static_cast<DataType
>(2);
160 const IndexType
offset = 64;
162 full_redux = in_offset.maximum();
165 DataType* gpu_in_data =
166 static_cast<DataType*
>(sycl_device.allocate(n_elems *
sizeof(DataType)));
167 DataType* gpu_out_data =
168 static_cast<DataType*
>(sycl_device.allocate(
sizeof(DataType)));
172 sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),
173 n_elems *
sizeof(DataType));
174 out_gpu.device(sycl_device) = in_gpu.maximum();
175 sycl_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_data,
181 sycl_device.deallocate(gpu_in_data);
182 sycl_device.deallocate(gpu_out_data);
185 template <
typename DataType,
int DataLayout,
typename IndexType>
187 const Eigen::SyclDevice& sycl_device) {
188 const IndexType num_rows = 4096;
189 const IndexType num_cols = 4096;
208 DataType* gpu_in_data =
static_cast<DataType*
>(
210 DataType* gpu_in_arg1_data =
static_cast<DataType*
>(sycl_device.allocate(
212 DataType* gpu_in_arg2_data =
static_cast<DataType*
>(sycl_device.allocate(
214 bool* gpu_out_arg__gpu_helper_data =
static_cast<bool*
>(sycl_device.allocate(
216 bool* gpu_out_arg_data =
static_cast<bool*
>(sycl_device.allocate(
219 DataType* gpu_out_data = (DataType*)sycl_device.allocate(
sizeof(DataType));
224 gpu_in_arg1_data, tensorRange);
226 gpu_in_arg2_data, tensorRange);
228 gpu_out_arg_data, argRange);
230 gpu_out_arg__gpu_helper_data, argRange);
235 (in_arg1.argmax(1) == in_arg2.argmax(1))
236 .select(out_arg_cpu.constant(
true), out_arg_cpu.constant(
false));
237 full_redux = (out_arg_cpu.template cast<float>())
241 sycl_device.memcpyHostToDevice(
243 sycl_device.memcpyHostToDevice(
244 gpu_in_arg1_data, in_arg1.
data(),
246 sycl_device.memcpyHostToDevice(
247 gpu_in_arg2_data, in_arg2.
data(),
249 out_Argout_gpu_helper.device(sycl_device) =
250 (in_Arg1_gpu.argmax(1) == in_Arg2_gpu.argmax(1));
251 out_Argout_gpu.device(sycl_device) =
252 (out_Argout_gpu_helper)
253 .select(out_Argout_gpu.constant(
true),
254 out_Argout_gpu.constant(
false));
255 out_gpu.device(sycl_device) =
256 (out_Argout_gpu.template cast<float>())
258 sycl_device.memcpyDeviceToHost(full_redux_gpu.
data(), gpu_out_data,
261 std::cout <<
"SYCL : " << full_redux_gpu() <<
" , CPU : " << full_redux()
264 sycl_device.deallocate(gpu_in_data);
265 sycl_device.deallocate(gpu_in_arg1_data);
266 sycl_device.deallocate(gpu_in_arg2_data);
267 sycl_device.deallocate(gpu_out_arg__gpu_helper_data);
268 sycl_device.deallocate(gpu_out_arg_data);
269 sycl_device.deallocate(gpu_out_data);
272 template <
typename DataType,
int DataLayout,
typename IndexType>
274 const Eigen::SyclDevice& sycl_device) {
277 const IndexType num_rows = 64;
278 const IndexType num_cols = 64;
282 data_tensor in(tensor_range);
283 scalar_tensor full_redux;
284 scalar_tensor full_redux_gpu;
288 tensor_offset_range[0] -= 1;
290 const IndexType
offset = 64;
292 full_redux = in_offset.mean();
295 DataType* gpu_in_data =
296 static_cast<DataType*
>(sycl_device.allocate(n_elems *
sizeof(DataType)));
297 DataType* gpu_out_data =
298 static_cast<DataType*
>(sycl_device.allocate(
sizeof(DataType)));
302 sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),
303 n_elems *
sizeof(DataType));
304 out_gpu.device(sycl_device) = in_gpu.mean();
305 sycl_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_data,
311 sycl_device.deallocate(gpu_in_data);
312 sycl_device.deallocate(gpu_out_data);
315 template <
typename DataType,
int DataLayout,
typename IndexType>
317 const Eigen::SyclDevice& sycl_device) {
324 const IndexType n_elems = 8707;
327 data_tensor in(tensor_range);
329 DataType full_redux_gpu;
333 const DataType const_val =
static_cast<DataType
>(0.6391);
334 in = in.constant(const_val);
336 Eigen::IndexList<Eigen::type2index<0>> red_axis;
340 DataType* gpu_in_data =
341 static_cast<DataType*
>(sycl_device.allocate(n_elems *
sizeof(DataType)));
342 DataType* gpu_out_data =
343 static_cast<DataType*
>(sycl_device.allocate(
sizeof(DataType)));
347 sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),
348 n_elems *
sizeof(DataType));
349 out_gpu.device(sycl_device) =
351 sycl_device.memcpyDeviceToHost(red_gpu.
data(), gpu_out_data,
357 sycl_device.deallocate(gpu_in_data);
358 sycl_device.deallocate(gpu_out_data);
361 template <
typename DataType,
int DataLayout,
typename IndexType>
363 const Eigen::SyclDevice& sycl_device) {
364 const IndexType num_rows = 876;
365 const IndexType num_cols = 953;
374 full_redux = in.minimum();
376 DataType* gpu_in_data =
static_cast<DataType*
>(
378 DataType* gpu_out_data = (DataType*)sycl_device.allocate(
sizeof(DataType));
384 sycl_device.memcpyHostToDevice(
386 out_gpu.device(sycl_device) = in_gpu.minimum();
387 sycl_device.memcpyDeviceToHost(full_redux_gpu.
data(), gpu_out_data,
391 sycl_device.deallocate(gpu_in_data);
392 sycl_device.deallocate(gpu_out_data);
395 template <
typename DataType,
int DataLayout,
typename IndexType>
397 const Eigen::SyclDevice& sycl_device) {
400 const IndexType num_rows = 64;
401 const IndexType num_cols = 64;
405 data_tensor in(tensor_range);
406 scalar_tensor full_redux;
407 scalar_tensor full_redux_gpu;
411 tensor_offset_range[0] -= 1;
414 in(0) =
static_cast<DataType
>(-2);
416 const IndexType
offset = 64;
418 full_redux = in_offset.minimum();
421 DataType* gpu_in_data =
422 static_cast<DataType*
>(sycl_device.allocate(n_elems *
sizeof(DataType)));
423 DataType* gpu_out_data =
424 static_cast<DataType*
>(sycl_device.allocate(
sizeof(DataType)));
428 sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),
429 n_elems *
sizeof(DataType));
430 out_gpu.device(sycl_device) = in_gpu.minimum();
431 sycl_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_data,
437 sycl_device.deallocate(gpu_in_data);
438 sycl_device.deallocate(gpu_out_data);
440 template <
typename DataType,
int DataLayout,
typename IndexType>
442 const Eigen::SyclDevice& sycl_device) {
443 IndexType dim_x = 145;
445 IndexType dim_z = 67;
458 redux = in.maximum(red_axis);
460 DataType* gpu_in_data =
static_cast<DataType*
>(
462 DataType* gpu_out_data =
static_cast<DataType*
>(sycl_device.allocate(
468 gpu_out_data, reduced_tensorRange);
470 sycl_device.memcpyHostToDevice(
472 out_gpu.device(sycl_device) = in_gpu.maximum(red_axis);
473 sycl_device.memcpyDeviceToHost(
474 redux_gpu.
data(), gpu_out_data,
478 for (IndexType
j = 0;
j < reduced_tensorRange[0];
j++)
479 for (IndexType k = 0; k < reduced_tensorRange[1]; k++)
482 sycl_device.deallocate(gpu_in_data);
483 sycl_device.deallocate(gpu_out_data);
486 template <
typename DataType,
int DataLayout,
typename IndexType>
488 const Eigen::SyclDevice& sycl_device) {
492 const IndexType num_rows = 64;
493 const IndexType num_cols = 64;
497 const IndexType n_reduced = num_cols;
499 data_tensor in(tensor_range);
500 reduced_tensor redux;
501 reduced_tensor redux_gpu(reduced_range);
505 tensor_offset_range[0] -= 1;
507 for (IndexType
i = 0;
i < n_reduced;
i++) {
508 in(
i) =
static_cast<DataType
>(2);
514 const IndexType
offset = 64;
516 redux = in_offset.maximum(red_axis);
517 for (IndexType
i = 0;
i < n_reduced;
i++) {
521 DataType* gpu_in_data =
522 static_cast<DataType*
>(sycl_device.allocate(n_elems *
sizeof(DataType)));
523 DataType* gpu_out_data =
static_cast<DataType*
>(
524 sycl_device.allocate(n_reduced *
sizeof(DataType)));
528 sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),
529 n_elems *
sizeof(DataType));
530 out_gpu.device(sycl_device) = in_gpu.maximum(red_axis);
531 sycl_device.memcpyDeviceToHost(redux_gpu.data(), gpu_out_data,
532 n_reduced *
sizeof(DataType));
535 for (IndexType
i = 0;
i < n_reduced;
i++) {
539 sycl_device.deallocate(gpu_in_data);
540 sycl_device.deallocate(gpu_out_data);
543 template <
typename DataType,
int DataLayout,
typename IndexType>
545 const Eigen::SyclDevice& sycl_device) {
549 const IndexType num_rows = 64;
550 const IndexType num_cols = 64;
555 const IndexType n_reduced = reduced_range[0];
557 data_tensor in(tensor_range);
558 reduced_tensor redux(full_reduced_range);
559 reduced_tensor redux_gpu(reduced_range);
564 tensor_offset_range[0] -= 1;
566 for (IndexType
i = 0;
i < n_reduced;
i++) {
567 in(
i) =
static_cast<DataType
>(2);
573 const IndexType
offset = 64;
577 red_offset = in_offset.maximum(red_axis);
582 for (IndexType
i = 0;
i < n_reduced;
i++) {
586 DataType* gpu_in_data =
587 static_cast<DataType*
>(sycl_device.allocate(n_elems *
sizeof(DataType)));
588 DataType* gpu_out_data =
static_cast<DataType*
>(
589 sycl_device.allocate((n_reduced + 1) *
sizeof(DataType)));
593 sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),
594 n_elems *
sizeof(DataType));
595 out_gpu.device(sycl_device) = in_gpu.maximum(red_axis);
596 sycl_device.memcpyDeviceToHost(redux_gpu.data(), out_gpu.
data(),
597 n_reduced *
sizeof(DataType));
600 for (IndexType
i = 0;
i < n_reduced;
i++) {
604 sycl_device.deallocate(gpu_in_data);
605 sycl_device.deallocate(gpu_out_data);
608 template <
typename DataType,
int DataLayout,
typename IndexType>
610 const Eigen::SyclDevice& sycl_device, IndexType dim_x, IndexType dim_y) {
621 redux = in.sum(red_axis);
623 DataType* gpu_in_data =
static_cast<DataType*
>(
625 DataType* gpu_out_data =
static_cast<DataType*
>(sycl_device.allocate(
631 gpu_out_data, reduced_tensorRange);
633 sycl_device.memcpyHostToDevice(
635 out_gpu.device(sycl_device) = in_gpu.sum(red_axis);
636 sycl_device.memcpyDeviceToHost(
637 redux_gpu.
data(), gpu_out_data,
641 for (IndexType
i = 0;
i < redux.
size();
i++) {
644 sycl_device.deallocate(gpu_in_data);
645 sycl_device.deallocate(gpu_out_data);
648 template <
typename DataType,
int DataLayout,
typename IndexType>
650 const Eigen::SyclDevice& sycl_device) {
651 IndexType dim_x = 145;
653 IndexType dim_z = 67;
666 redux = in.mean(red_axis);
668 DataType* gpu_in_data =
static_cast<DataType*
>(
670 DataType* gpu_out_data =
static_cast<DataType*
>(sycl_device.allocate(
676 gpu_out_data, reduced_tensorRange);
678 sycl_device.memcpyHostToDevice(
680 out_gpu.device(sycl_device) = in_gpu.mean(red_axis);
681 sycl_device.memcpyDeviceToHost(
682 redux_gpu.
data(), gpu_out_data,
686 for (IndexType
j = 0;
j < reduced_tensorRange[0];
j++)
687 for (IndexType k = 0; k < reduced_tensorRange[1]; k++)
690 sycl_device.deallocate(gpu_in_data);
691 sycl_device.deallocate(gpu_out_data);
694 template <
typename DataType,
int DataLayout,
typename IndexType>
696 const Eigen::SyclDevice& sycl_device) {
697 IndexType dim_x = 64;
699 IndexType dim_z = 32;
712 redux = in.mean(red_axis);
714 DataType* gpu_in_data =
static_cast<DataType*
>(
716 DataType* gpu_out_data =
static_cast<DataType*
>(sycl_device.allocate(
722 gpu_out_data, reduced_tensorRange);
724 sycl_device.memcpyHostToDevice(
726 out_gpu.device(sycl_device) = in_gpu.mean(red_axis);
727 sycl_device.memcpyDeviceToHost(
728 redux_gpu.
data(), gpu_out_data,
731 for (IndexType
j = 0;
j < reduced_tensorRange[0];
j++)
732 for (IndexType k = 0; k < reduced_tensorRange[1]; k++)
735 sycl_device.deallocate(gpu_in_data);
736 sycl_device.deallocate(gpu_out_data);
739 template <
typename DataType,
int DataLayout,
typename IndexType>
741 const Eigen::SyclDevice& sycl_device) {
742 IndexType dim_x = 64;
744 IndexType dim_z = 32;
757 redux = in.sum(red_axis);
759 DataType* gpu_in_data =
static_cast<DataType*
>(
761 DataType* gpu_out_data =
static_cast<DataType*
>(sycl_device.allocate(
767 gpu_out_data, reduced_tensorRange);
769 sycl_device.memcpyHostToDevice(
771 out_gpu.device(sycl_device) = in_gpu.sum(red_axis);
772 sycl_device.memcpyDeviceToHost(
773 redux_gpu.
data(), gpu_out_data,
776 for (IndexType
j = 0;
j < reduced_tensorRange[0];
j++)
777 for (IndexType k = 0; k < reduced_tensorRange[1]; k++)
780 sycl_device.deallocate(gpu_in_data);
781 sycl_device.deallocate(gpu_out_data);
784 template <
typename DataType,
int DataLayout,
typename IndexType>
786 const Eigen::SyclDevice& sycl_device) {
789 Eigen::IndexList<Eigen::type2index<1>> red_axis;
790 auto reduced_tensorRange =
Sizes<64>(64);
797 redux_fix = in_fix.sum(red_axis);
799 DataType* gpu_in_data =
static_cast<DataType*
>(
800 sycl_device.allocate(in_fix.
dimensions().TotalSize() *
sizeof(DataType)));
801 DataType* gpu_out_data =
static_cast<DataType*
>(sycl_device.allocate(
802 redux_gpu_fix.
dimensions().TotalSize() *
sizeof(DataType)));
805 gpu_in_data, tensorRange);
807 gpu_out_data, reduced_tensorRange);
809 sycl_device.memcpyHostToDevice(
810 gpu_in_data, in_fix.
data(),
811 (in_fix.
dimensions().TotalSize()) *
sizeof(DataType));
812 out_gpu_fix.device(sycl_device) = in_gpu_fix.sum(red_axis);
813 sycl_device.memcpyDeviceToHost(
814 redux_gpu_fix.
data(), gpu_out_data,
815 redux_gpu_fix.
dimensions().TotalSize() *
sizeof(DataType));
817 for (IndexType
j = 0;
j < reduced_tensorRange[0];
j++) {
821 sycl_device.deallocate(gpu_in_data);
822 sycl_device.deallocate(gpu_out_data);
825 template <
typename DataType,
int DataLayout,
typename IndexType>
827 const Eigen::SyclDevice& sycl_device) {
829 Eigen::IndexList<Eigen::type2index<1>> red_axis;
830 auto reduced_tensorRange =
Sizes<64>(64);
836 redux_fix = in_fix.mean(red_axis);
838 DataType* gpu_in_data =
static_cast<DataType*
>(
839 sycl_device.allocate(in_fix.
dimensions().TotalSize() *
sizeof(DataType)));
840 DataType* gpu_out_data =
static_cast<DataType*
>(sycl_device.allocate(
841 redux_gpu_fix.
dimensions().TotalSize() *
sizeof(DataType)));
844 gpu_in_data, tensorRange);
846 gpu_out_data, reduced_tensorRange);
848 sycl_device.memcpyHostToDevice(
849 gpu_in_data, in_fix.
data(),
850 (in_fix.
dimensions().TotalSize()) *
sizeof(DataType));
851 out_gpu_fix.device(sycl_device) = in_gpu_fix.mean(red_axis);
852 sycl_device.memcpyDeviceToHost(
853 redux_gpu_fix.
data(), gpu_out_data,
854 redux_gpu_fix.
dimensions().TotalSize() *
sizeof(DataType));
855 sycl_device.synchronize();
857 for (IndexType
j = 0;
j < reduced_tensorRange[0];
j++) {
861 sycl_device.deallocate(gpu_in_data);
862 sycl_device.deallocate(gpu_out_data);
868 template <
typename InT,
typename OutT>
898 template <
typename DataType,
typename AccumType,
int DataLayout,
901 const Eigen::SyclDevice& sycl_device) {
902 constexpr IndexType InSize = 64;
904 Eigen::IndexList<Eigen::type2index<0>> dims;
905 auto reduced_tensorRange =
Sizes<>();
913 size_t in_size_bytes = in_fix.
dimensions().TotalSize() *
sizeof(DataType);
914 DataType* gpu_in_data =
915 static_cast<DataType*
>(sycl_device.allocate(in_size_bytes));
916 AccumType* gpu_out_data =
917 static_cast<AccumType*
>(sycl_device.allocate(
sizeof(AccumType)));
920 gpu_in_data, tensorRange);
922 gpu_out_data, reduced_tensorRange);
924 sycl_device.memcpyHostToDevice(gpu_in_data, in_fix.
data(), in_size_bytes);
925 out_gpu_fix.device(sycl_device) = in_gpu_fix.reduce(dims, reducer);
926 sycl_device.memcpyDeviceToHost(redux_gpu_fix.
data(), gpu_out_data,
930 sycl_device.deallocate(gpu_in_data);
931 sycl_device.deallocate(gpu_out_data);
934 template <
typename DataType,
typename Dev>
936 test_full_reductions_sum_sycl<DataType, RowMajor, int64_t>(sycl_device);
937 test_full_reductions_sum_sycl<DataType, ColMajor, int64_t>(sycl_device);
938 test_full_reductions_min_sycl<DataType, ColMajor, int64_t>(sycl_device);
939 test_full_reductions_min_sycl<DataType, RowMajor, int64_t>(sycl_device);
940 test_full_reductions_max_sycl<DataType, ColMajor, int64_t>(sycl_device);
941 test_full_reductions_max_sycl<DataType, RowMajor, int64_t>(sycl_device);
943 test_full_reductions_mean_sycl<DataType, ColMajor, int64_t>(sycl_device);
944 test_full_reductions_mean_sycl<DataType, RowMajor, int64_t>(sycl_device);
945 test_full_reductions_custom_sycl<DataType, int, RowMajor, int64_t>(
947 test_full_reductions_custom_sycl<DataType, int, ColMajor, int64_t>(
949 sycl_device.synchronize();
952 template <
typename DataType,
typename Dev>
954 test_full_reductions_sum_with_offset_sycl<DataType, RowMajor, int64_t>(
956 test_full_reductions_sum_with_offset_sycl<DataType, ColMajor, int64_t>(
958 test_full_reductions_min_with_offset_sycl<DataType, RowMajor, int64_t>(
960 test_full_reductions_min_with_offset_sycl<DataType, ColMajor, int64_t>(
962 test_full_reductions_max_with_offset_sycl<DataType, ColMajor, int64_t>(
964 test_full_reductions_max_with_offset_sycl<DataType, RowMajor, int64_t>(
966 test_full_reductions_mean_with_offset_sycl<DataType, RowMajor, int64_t>(
968 test_full_reductions_mean_with_offset_sycl<DataType, ColMajor, int64_t>(
970 test_full_reductions_mean_with_odd_offset_sycl<DataType, RowMajor, int64_t>(
972 sycl_device.synchronize();
975 template <
typename DataType,
typename Dev>
977 test_first_dim_reductions_sum_sycl<DataType, ColMajor, int64_t>(sycl_device,
979 test_first_dim_reductions_sum_sycl<DataType, RowMajor, int64_t>(sycl_device,
981 test_first_dim_reductions_sum_sycl<DataType, RowMajor, int64_t>(sycl_device,
983 test_first_dim_reductions_max_sycl<DataType, RowMajor, int64_t>(sycl_device);
984 test_first_dim_reductions_max_with_offset_sycl<DataType, RowMajor, int64_t>(
986 sycl_device.synchronize();
989 template <
typename DataType,
typename Dev>
991 test_last_dim_reductions_sum_sycl<DataType, RowMajor, int64_t>(sycl_device);
992 test_last_dim_reductions_max_with_offset_sycl<DataType, RowMajor, int64_t>(
994 test_last_reductions_sum_sycl<DataType, ColMajor, int64_t>(sycl_device);
995 test_last_reductions_sum_sycl<DataType, RowMajor, int64_t>(sycl_device);
996 test_last_reductions_mean_sycl<DataType, ColMajor, int64_t>(sycl_device);
997 test_last_reductions_mean_sycl<DataType, RowMajor, int64_t>(sycl_device);
998 sycl_device.synchronize();
1002 for (
const auto& device : Eigen::get_sycl_supported_devices()) {
1003 std::cout <<
"Running on "
1004 << device.template get_info<cl::sycl::info::device::name>()
1006 QueueInterface queueInterface(device);
1007 auto sycl_device = Eigen::SyclDevice(&queueInterface);
1008 CALL_SUBTEST_1(sycl_reduction_test_full_per_device<float>(sycl_device));
1009 CALL_SUBTEST_2(sycl_reduction_full_offset_per_device<float>(sycl_device));
1011 sycl_reduction_test_first_dim_per_device<float>(sycl_device));
1012 CALL_SUBTEST_4(sycl_reduction_test_last_dim_per_device<float>(sycl_device));