16 #define EIGEN_TEST_NO_LONGDOUBLE
17 #define EIGEN_TEST_NO_COMPLEX
19 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
20 #define EIGEN_USE_SYCL
24 #include <unsupported/Eigen/CXX11/Tensor>
27 using Eigen::SyclDevice;
32 template<
typename DataType,
int DataLayout,
typename IndexType>
36 IndexType sizeDim1 = 2;
37 IndexType sizeDim2 = 3;
38 IndexType sizeDim3 = 5;
39 IndexType sizeDim4 = 7;
46 paddings[0] = std::make_pair(0, 0);
47 paddings[1] = std::make_pair(2, 1);
48 paddings[2] = std::make_pair(3, 4);
49 paddings[3] = std::make_pair(0, 0);
51 IndexType padedSizeDim1 = 2;
52 IndexType padedSizeDim2 = 6;
53 IndexType padedSizeDim3 = 12;
54 IndexType padedSizeDim4 = 7;
55 array<IndexType, 4> padedtensorRange = {{padedSizeDim1, padedSizeDim2, padedSizeDim3, padedSizeDim4}};
60 DataType* gpu_data1 =
static_cast<DataType*
>(sycl_device.allocate(tensor.
size()*
sizeof(DataType)));
61 DataType* gpu_data2 =
static_cast<DataType*
>(sycl_device.allocate(padded.
size()*
sizeof(DataType)));
69 sycl_device.memcpyHostToDevice(gpu_data1, tensor.
data(),(tensor.
size())*
sizeof(DataType));
70 gpu2.device(sycl_device)=gpu1.pad(paddings);
71 sycl_device.memcpyDeviceToHost(padded.
data(), gpu_data2,(padded.
size())*
sizeof(DataType));
72 for (IndexType
i = 0;
i < padedSizeDim1; ++
i) {
73 for (IndexType
j = 0;
j < padedSizeDim2; ++
j) {
74 for (IndexType k = 0; k < padedSizeDim3; ++k) {
75 for (IndexType
l = 0;
l < padedSizeDim4; ++
l) {
76 if (
j >= 2 && j < 5 && k >= 3 && k < 8) {
85 sycl_device.deallocate(gpu_data1);
86 sycl_device.deallocate(gpu_data2);
89 template<
typename DataType,
int DataLayout,
typename IndexType>
92 IndexType sizeDim1 = 2;
93 IndexType sizeDim2 = 3;
94 IndexType sizeDim3 = 5;
95 IndexType sizeDim4 = 7;
102 paddings[0] = std::make_pair(0, 0);
103 paddings[1] = std::make_pair(2, 1);
104 paddings[2] = std::make_pair(3, 4);
105 paddings[3] = std::make_pair(0, 0);
108 reshape_dims[0] = 12;
109 reshape_dims[1] = 84;
114 DataType* gpu_data1 =
static_cast<DataType*
>(sycl_device.allocate(tensor.
size()*
sizeof(DataType)));
115 DataType* gpu_data2 =
static_cast<DataType*
>(sycl_device.allocate(
result.size()*
sizeof(DataType)));
120 sycl_device.memcpyHostToDevice(gpu_data1, tensor.
data(),(tensor.
size())*
sizeof(DataType));
121 gpu2.device(sycl_device)=gpu1.pad(paddings).reshape(reshape_dims);
122 sycl_device.memcpyDeviceToHost(
result.data(), gpu_data2,(
result.size())*
sizeof(DataType));
124 for (IndexType
i = 0;
i < 2; ++
i) {
125 for (IndexType
j = 0;
j < 6; ++
j) {
126 for (IndexType k = 0; k < 12; ++k) {
127 for (IndexType
l = 0;
l < 7; ++
l) {
130 if (
j >= 2 && j < 5 && k >= 3 && k < 8) {
139 sycl_device.deallocate(gpu_data1);
140 sycl_device.deallocate(gpu_data2);
144 QueueInterface queueInterface(
s);
145 auto sycl_device = Eigen::SyclDevice(&queueInterface);
146 test_simple_padding<DataType, RowMajor, int64_t>(sycl_device);
147 test_simple_padding<DataType, ColMajor, int64_t>(sycl_device);
148 test_padded_expr<DataType, RowMajor, int64_t>(sycl_device);
149 test_padded_expr<DataType, ColMajor, int64_t>(sycl_device);
154 for (
const auto& device :Eigen::get_sycl_supported_devices()) {
155 CALL_SUBTEST(sycl_padding_test_per_device<float>(device));