14 #define EIGEN_TEST_NO_LONGDOUBLE
15 #define EIGEN_TEST_NO_COMPLEX
17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
18 #define EIGEN_USE_SYCL
21 #include <unsupported/Eigen/CXX11/Tensor>
23 template <
typename DataType,
int DataLayout,
typename IndexType>
41 DataType* gpu_in_data =
static_cast<DataType*
>(
43 DataType* gpu_out_data =
static_cast<DataType*
>(sycl_device.allocate(
51 sycl_device.memcpyHostToDevice(
52 gpu_in_data, tensor.
data(),
54 out_gpu.device(sycl_device) = in_gpu.reverse(dim_rev);
55 sycl_device.memcpyDeviceToHost(
56 reversed_tensor.
data(), gpu_out_data,
59 for (IndexType
i = 0;
i < 2; ++
i) {
60 for (IndexType
j = 0;
j < 3; ++
j) {
61 for (IndexType k = 0; k < 5; ++k) {
62 for (IndexType
l = 0;
l < 7; ++
l) {
64 reversed_tensor(
i, 2 -
j, 4 - k,
l));
74 out_gpu.device(sycl_device) = in_gpu.reverse(dim_rev);
75 sycl_device.memcpyDeviceToHost(
76 reversed_tensor.
data(), gpu_out_data,
79 for (IndexType
i = 0;
i < 2; ++
i) {
80 for (IndexType
j = 0;
j < 3; ++
j) {
81 for (IndexType k = 0; k < 5; ++k) {
82 for (IndexType
l = 0;
l < 7; ++
l) {
93 out_gpu.device(sycl_device) = in_gpu.reverse(dim_rev);
94 sycl_device.memcpyDeviceToHost(
95 reversed_tensor.
data(), gpu_out_data,
98 for (IndexType
i = 0;
i < 2; ++
i) {
99 for (IndexType
j = 0;
j < 3; ++
j) {
100 for (IndexType k = 0; k < 5; ++k) {
101 for (IndexType
l = 0;
l < 7; ++
l) {
103 reversed_tensor(1 -
i,
j, k, 6 -
l));
109 sycl_device.deallocate(gpu_in_data);
110 sycl_device.deallocate(gpu_out_data);
113 template <
typename DataType,
int DataLayout,
typename IndexType>
133 DataType* gpu_in_data =
static_cast<DataType*
>(
135 DataType* gpu_out_data_expected =
static_cast<DataType*
>(sycl_device.allocate(
136 expected.dimensions().TotalSize() *
sizeof(DataType)));
137 DataType* gpu_out_data_result =
static_cast<DataType*
>(
138 sycl_device.allocate(
result.dimensions().TotalSize() *
sizeof(DataType)));
143 gpu_out_data_expected, tensorRange);
145 gpu_out_data_result, tensorRange);
147 sycl_device.memcpyHostToDevice(
148 gpu_in_data, tensor.
data(),
152 out_gpu_expected.reverse(dim_rev).device(sycl_device) = in_gpu;
154 out_gpu_expected.device(sycl_device) = in_gpu.reverse(dim_rev);
156 sycl_device.memcpyDeviceToHost(
158 expected.dimensions().TotalSize() *
sizeof(DataType));
161 src_slice_dim[0] = 2;
162 src_slice_dim[1] = 3;
163 src_slice_dim[2] = 1;
164 src_slice_dim[3] = 7;
166 src_slice_start[0] = 0;
167 src_slice_start[1] = 0;
168 src_slice_start[2] = 0;
169 src_slice_start[3] = 0;
173 for (IndexType
i = 0;
i < 5; ++
i) {
175 out_gpu_result.slice(dst_slice_start, dst_slice_dim)
177 .device(sycl_device) = in_gpu.slice(src_slice_start, src_slice_dim);
179 out_gpu_result.slice(dst_slice_start, dst_slice_dim).device(sycl_device) =
180 in_gpu.slice(src_slice_start, src_slice_dim).reverse(dim_rev);
182 src_slice_start[2] += 1;
183 dst_slice_start[2] += 1;
185 sycl_device.memcpyDeviceToHost(
186 result.data(), gpu_out_data_result,
187 result.dimensions().TotalSize() *
sizeof(DataType));
189 for (IndexType
i = 0;
i <
expected.dimension(0); ++
i) {
190 for (IndexType
j = 0;
j <
expected.dimension(1); ++
j) {
191 for (IndexType k = 0; k <
expected.dimension(2); ++k) {
192 for (IndexType
l = 0;
l <
expected.dimension(3); ++
l) {
199 dst_slice_start[2] = 0;
201 sycl_device.memcpyHostToDevice(
202 gpu_out_data_result,
result.data(),
203 (
result.dimensions().TotalSize()) *
sizeof(DataType));
204 for (IndexType
i = 0;
i < 5; ++
i) {
206 out_gpu_result.slice(dst_slice_start, dst_slice_dim)
208 .device(sycl_device) = in_gpu.slice(dst_slice_start, dst_slice_dim);
210 out_gpu_result.slice(dst_slice_start, dst_slice_dim).device(sycl_device) =
211 in_gpu.reverse(dim_rev).slice(dst_slice_start, dst_slice_dim);
213 dst_slice_start[2] += 1;
215 sycl_device.memcpyDeviceToHost(
216 result.data(), gpu_out_data_result,
217 result.dimensions().TotalSize() *
sizeof(DataType));
219 for (IndexType
i = 0;
i <
expected.dimension(0); ++
i) {
220 for (IndexType
j = 0;
j <
expected.dimension(1); ++
j) {
221 for (IndexType k = 0; k <
expected.dimension(2); ++k) {
222 for (IndexType
l = 0;
l <
expected.dimension(3); ++
l) {
230 template <
typename DataType>
232 QueueInterface queueInterface(
d);
233 auto sycl_device = Eigen::SyclDevice(&queueInterface);
234 test_simple_reverse<DataType, RowMajor, int64_t>(sycl_device);
235 test_simple_reverse<DataType, ColMajor, int64_t>(sycl_device);
236 test_expr_reverse<DataType, RowMajor, int64_t>(sycl_device,
false);
237 test_expr_reverse<DataType, ColMajor, int64_t>(sycl_device,
false);
238 test_expr_reverse<DataType, RowMajor, int64_t>(sycl_device,
true);
239 test_expr_reverse<DataType, ColMajor, int64_t>(sycl_device,
true);
242 for (
const auto& device : Eigen::get_sycl_supported_devices()) {
243 std::cout <<
"Running on "
247 CALL_SUBTEST_3(sycl_reverse_test_per_device<unsigned int>(device));
248 #ifdef EIGEN_SYCL_DOUBLE_SUPPORT