14 #define EIGEN_TEST_NO_LONGDOUBLE 15 #define EIGEN_TEST_NO_COMPLEX 17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t 18 #define EIGEN_USE_SYCL 21 #include <unsupported/Eigen/CXX11/Tensor> 23 template <
typename DataType,
int DataLayout,
typename IndexType>
41 DataType* gpu_in_data =
static_cast<DataType*
>(
43 DataType* gpu_out_data =
static_cast<DataType*
>(sycl_device.allocate(
51 sycl_device.memcpyHostToDevice(
52 gpu_in_data, tensor.
data(),
55 sycl_device.memcpyDeviceToHost(
56 reversed_tensor.
data(), gpu_out_data,
59 for (IndexType
i = 0;
i < 2; ++
i) {
60 for (IndexType
j = 0;
j < 3; ++
j) {
61 for (IndexType k = 0; k < 5; ++k) {
62 for (IndexType
l = 0;
l < 7; ++
l) {
64 reversed_tensor(
i, 2 -
j, 4 - k,
l));
75 sycl_device.memcpyDeviceToHost(
76 reversed_tensor.
data(), gpu_out_data,
79 for (IndexType
i = 0;
i < 2; ++
i) {
80 for (IndexType
j = 0;
j < 3; ++
j) {
81 for (IndexType k = 0; k < 5; ++k) {
82 for (IndexType
l = 0;
l < 7; ++
l) {
94 sycl_device.memcpyDeviceToHost(
95 reversed_tensor.
data(), gpu_out_data,
98 for (IndexType
i = 0;
i < 2; ++
i) {
99 for (IndexType
j = 0;
j < 3; ++
j) {
100 for (IndexType k = 0; k < 5; ++k) {
101 for (IndexType
l = 0;
l < 7; ++
l) {
103 reversed_tensor(1 -
i,
j, k, 6 -
l));
109 sycl_device.deallocate(gpu_in_data);
110 sycl_device.deallocate(gpu_out_data);
113 template <
typename DataType,
int DataLayout,
typename IndexType>
133 DataType* gpu_in_data =
static_cast<DataType*
>(
135 DataType* gpu_out_data_expected =
static_cast<DataType*
>(sycl_device.allocate(
137 DataType* gpu_out_data_result =
static_cast<DataType*
>(
143 gpu_out_data_expected, tensorRange);
145 gpu_out_data_result, tensorRange);
147 sycl_device.memcpyHostToDevice(
148 gpu_in_data, tensor.
data(),
152 out_gpu_expected.
reverse(dim_rev).device(sycl_device) = in_gpu;
154 out_gpu_expected.
device(sycl_device) = in_gpu.
reverse(dim_rev);
156 sycl_device.memcpyDeviceToHost(
157 expected.
data(), gpu_out_data_expected,
161 src_slice_dim[0] = 2;
162 src_slice_dim[1] = 3;
163 src_slice_dim[2] = 1;
164 src_slice_dim[3] = 7;
166 src_slice_start[0] = 0;
167 src_slice_start[1] = 0;
168 src_slice_start[2] = 0;
169 src_slice_start[3] = 0;
173 for (IndexType
i = 0;
i < 5; ++
i) {
175 out_gpu_result.
slice(dst_slice_start, dst_slice_dim)
177 .device(sycl_device) = in_gpu.
slice(src_slice_start, src_slice_dim);
179 out_gpu_result.
slice(dst_slice_start, dst_slice_dim).device(sycl_device) =
180 in_gpu.
slice(src_slice_start, src_slice_dim).reverse(dim_rev);
182 src_slice_start[2] += 1;
183 dst_slice_start[2] += 1;
185 sycl_device.memcpyDeviceToHost(
186 result.
data(), gpu_out_data_result,
191 for (IndexType k = 0; k < expected.
dimension(2); ++k) {
199 dst_slice_start[2] = 0;
201 sycl_device.memcpyHostToDevice(
202 gpu_out_data_result, result.
data(),
204 for (IndexType
i = 0;
i < 5; ++
i) {
206 out_gpu_result.
slice(dst_slice_start, dst_slice_dim)
208 .device(sycl_device) = in_gpu.
slice(dst_slice_start, dst_slice_dim);
210 out_gpu_result.
slice(dst_slice_start, dst_slice_dim).device(sycl_device) =
211 in_gpu.
reverse(dim_rev).slice(dst_slice_start, dst_slice_dim);
213 dst_slice_start[2] += 1;
215 sycl_device.memcpyDeviceToHost(
216 result.
data(), gpu_out_data_result,
221 for (IndexType k = 0; k < expected.
dimension(2); ++k) {
230 template <
typename DataType>
232 QueueInterface queueInterface(d);
233 auto sycl_device = Eigen::SyclDevice(&queueInterface);
234 test_simple_reverse<DataType, RowMajor, int64_t>(sycl_device);
235 test_simple_reverse<DataType, ColMajor, int64_t>(sycl_device);
236 test_expr_reverse<DataType, RowMajor, int64_t>(sycl_device,
false);
237 test_expr_reverse<DataType, ColMajor, int64_t>(sycl_device,
false);
238 test_expr_reverse<DataType, RowMajor, int64_t>(sycl_device,
true);
239 test_expr_reverse<DataType, ColMajor, int64_t>(sycl_device,
true);
242 for (
const auto& device : Eigen::get_sycl_supported_devices()) {
243 std::cout <<
"Running on " 247 CALL_SUBTEST_3(sycl_reverse_test_per_device<unsigned int>(device));
248 #ifdef EIGEN_SYCL_DOUBLE_SUPPORT
#define CALL_SUBTEST_4(FUNC)
void sycl_reverse_test_per_device(const cl::sycl::device &d)
#define CALL_SUBTEST_3(FUNC)
static void test_simple_reverse(const Eigen::SyclDevice &sycl_device)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor< Scalar_, NumIndices_, Options_, IndexType_ > & setRandom()
EIGEN_DECLARE_TEST(cxx11_tensor_reverse_sycl)
static const Line3 l(Rot3(), 1, 1)
#define VERIFY_IS_EQUAL(a, b)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex TotalSize() const
#define CALL_SUBTEST_1(FUNC)
A tensor expression mapping an existing array of data.
static void test_expr_reverse(const Eigen::SyclDevice &sycl_device, bool LValue)
TensorDevice< TensorMap< PlainObjectType, Options_, MakePointer_ >, DeviceType > device(const DeviceType &dev)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorSlicingOp< const StartIndices, const Sizes, const TensorMap< PlainObjectType, Options_, MakePointer_ > > slice(const StartIndices &startIndices, const Sizes &sizes) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar * data()
#define CALL_SUBTEST_5(FUNC)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorReverseOp< const ReverseDimensions, const TensorMap< PlainObjectType, Options_, MakePointer_ > > reverse(const ReverseDimensions &rev) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index dimension(std::size_t n) const
#define CALL_SUBTEST_2(FUNC)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const