14 #define EIGEN_TEST_NO_LONGDOUBLE
15 #define EIGEN_TEST_NO_COMPLEX
16 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
17 #define EIGEN_USE_SYCL
20 #include <unsupported/Eigen/CXX11/Tensor>
25 template <
typename DataType,
int DataLayout,
typename IndexType>
27 IndexType k_size, IndexType n_size,
int consume_dim,
30 std::cout <<
"Testing for (" << m_size <<
"," << k_size <<
"," << n_size
31 <<
" consume_dim : " << consume_dim <<
")" << std::endl;
41 DataType* gpu_data_in =
42 static_cast<DataType*
>(sycl_device.allocate(t_input_bytes));
43 DataType* gpu_data_out =
44 static_cast<DataType*
>(sycl_device.allocate(t_result_bytes));
48 gpu_data_in, tensorRange);
50 gpu_data_out, tensorRange);
51 sycl_device.memcpyHostToDevice(gpu_data_in, t_input.
data(), t_input_bytes);
52 sycl_device.memcpyHostToDevice(gpu_data_out, t_input.
data(), t_input_bytes);
54 gpu_t_result.device(sycl_device) = gpu_t_input.cumsum(consume_dim, exclusive);
56 t_result = t_input.cumsum(consume_dim, exclusive);
58 sycl_device.memcpyDeviceToHost(t_result_gpu.
data(), gpu_data_out,
60 sycl_device.synchronize();
62 for (IndexType
i = 0;
i < t_result.
size();
i++) {
63 if (
static_cast<DataType
>(
std::fabs(
static_cast<DataType
>(
71 std::cout <<
"mismatch detected at index " <<
i <<
" CPU : " << t_result(
i)
72 <<
" vs SYCL : " << t_result_gpu(
i) << std::endl;
75 sycl_device.deallocate(gpu_data_in);
76 sycl_device.deallocate(gpu_data_out);
79 template <
typename DataType,
typename Dev>
81 test_sycl_cumsum<DataType, ColMajor, int64_t>(sycl_device, 2049, 1023, 127, 0,
83 test_sycl_cumsum<DataType, RowMajor, int64_t>(sycl_device, 2049, 1023, 127, 0,
86 template <
typename DataType,
typename Dev>
88 test_sycl_cumsum<DataType, ColMajor, int64_t>(sycl_device, 1023, 2049, 127, 1,
90 test_sycl_cumsum<DataType, RowMajor, int64_t>(sycl_device, 1023, 2049, 127, 1,
93 template <
typename DataType,
typename Dev>
95 test_sycl_cumsum<DataType, ColMajor, int64_t>(sycl_device, 1023, 127, 2049, 2,
97 test_sycl_cumsum<DataType, RowMajor, int64_t>(sycl_device, 1023, 127, 2049, 2,
100 template <
typename DataType,
typename Dev>
102 test_sycl_cumsum<DataType, ColMajor, int64_t>(sycl_device, 2049, 1023, 127, 0,
104 test_sycl_cumsum<DataType, RowMajor, int64_t>(sycl_device, 2049, 1023, 127, 0,
107 template <
typename DataType,
typename Dev>
109 test_sycl_cumsum<DataType, ColMajor, int64_t>(sycl_device, 1023, 2049, 127, 1,
111 test_sycl_cumsum<DataType, RowMajor, int64_t>(sycl_device, 1023, 2049, 127, 1,
114 template <
typename DataType,
typename Dev>
116 test_sycl_cumsum<DataType, ColMajor, int64_t>(sycl_device, 1023, 127, 2049, 2,
118 test_sycl_cumsum<DataType, RowMajor, int64_t>(sycl_device, 1023, 127, 2049, 2,
122 for (
const auto& device : Eigen::get_sycl_supported_devices()) {
123 std::cout <<
"Running on "
124 << device.template get_info<cl::sycl::info::device::name>()
126 QueueInterface queueInterface(device);
127 auto sycl_device = Eigen::SyclDevice(&queueInterface);
129 sycl_scan_test_exclusive_dim0_per_device<float>(sycl_device));
131 sycl_scan_test_exclusive_dim1_per_device<float>(sycl_device));
133 sycl_scan_test_exclusive_dim2_per_device<float>(sycl_device));
135 sycl_scan_test_inclusive_dim0_per_device<float>(sycl_device));
137 sycl_scan_test_inclusive_dim1_per_device<float>(sycl_device));
139 sycl_scan_test_inclusive_dim2_per_device<float>(sycl_device));