15 #define EIGEN_TEST_NO_LONGDOUBLE
16 #define EIGEN_TEST_NO_COMPLEX
18 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
19 #define EIGEN_USE_SYCL
23 #include <Eigen/CXX11/Tensor>
27 template <
typename DataType,
int DataLayout,
typename IndexType>
30 IndexType sizeDim1 = 2;
31 IndexType sizeDim2 = 3;
32 IndexType sizeDim3 = 5;
33 IndexType sizeDim4 = 7;
37 patchTensorRange = {{1, 1, 1, 1, sizeDim1*sizeDim2*sizeDim3*sizeDim4}};
39 patchTensorRange = {{sizeDim1*sizeDim2*sizeDim3*sizeDim4,1, 1, 1, 1}};
53 const size_t tensorBuffSize =tensor.
size()*
sizeof(DataType);
54 size_t patchTensorBuffSize =no_patch.
size()*
sizeof(DataType);
55 DataType* gpu_data_tensor =
static_cast<DataType*
>(sycl_device.allocate(tensorBuffSize));
56 DataType* gpu_data_no_patch =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
61 sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.
data(), tensorBuffSize);
62 gpu_no_patch.device(sycl_device)=gpu_tensor.extract_patches(patch_dims);
63 sycl_device.memcpyDeviceToHost(no_patch.
data(), gpu_data_no_patch, patchTensorBuffSize);
79 for (
int i = 0;
i < tensor.
size(); ++
i) {
89 patchTensorRange = {{sizeDim1,sizeDim2,sizeDim3,sizeDim4,1}};
91 patchTensorRange = {{1,sizeDim1,sizeDim2,sizeDim3,sizeDim4}};
94 patchTensorBuffSize =single_patch.
size()*
sizeof(DataType);
95 DataType* gpu_data_single_patch =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
98 gpu_single_patch.device(sycl_device)=gpu_tensor.extract_patches(patch_dims);
99 sycl_device.memcpyDeviceToHost(single_patch.
data(), gpu_data_single_patch, patchTensorBuffSize);
115 for (
int i = 0;
i < tensor.
size(); ++
i) {
124 patchTensorRange = {{1,2,2,1,2*2*4*7}};
126 patchTensorRange = {{2*2*4*7, 1, 2,2,1}};
129 patchTensorBuffSize =twod_patch.
size()*
sizeof(DataType);
130 DataType* gpu_data_twod_patch =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
133 gpu_twod_patch.device(sycl_device)=gpu_tensor.extract_patches(patch_dims);
134 sycl_device.memcpyDeviceToHost(twod_patch.
data(), gpu_data_twod_patch, patchTensorBuffSize);
150 for (
int i = 0;
i < 2; ++
i) {
151 for (
int j = 0;
j < 2; ++
j) {
152 for (
int k = 0; k < 4; ++k) {
153 for (
int l = 0;
l < 7; ++
l) {
156 patch_loc =
i + 2 * (
j + 2 * (k + 4 *
l));
158 patch_loc =
l + 7 * (k + 4 * (
j + 2 *
i));
160 for (
int x = 0;
x < 2; ++
x) {
161 for (
int y = 0;
y < 2; ++
y) {
180 patchTensorRange = {{1,2,3,5,2*2*3*3}};
182 patchTensorRange = {{2*2*3*3, 1, 2,3,5}};
185 patchTensorBuffSize =threed_patch.
size()*
sizeof(DataType);
186 DataType* gpu_data_threed_patch =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
189 gpu_threed_patch.device(sycl_device)=gpu_tensor.extract_patches(patch_dims);
190 sycl_device.memcpyDeviceToHost(threed_patch.
data(), gpu_data_threed_patch, patchTensorBuffSize);
206 for (
int i = 0;
i < 2; ++
i) {
207 for (
int j = 0;
j < 2; ++
j) {
208 for (
int k = 0; k < 3; ++k) {
209 for (
int l = 0;
l < 3; ++
l) {
212 patch_loc =
i + 2 * (
j + 2 * (k + 3 *
l));
214 patch_loc =
l + 3 * (k + 3 * (
j + 2 *
i));
216 for (
int x = 0;
x < 2; ++
x) {
217 for (
int y = 0;
y < 3; ++
y) {
218 for (
int z = 0;
z < 5; ++
z) {
231 sycl_device.deallocate(gpu_data_tensor);
232 sycl_device.deallocate(gpu_data_no_patch);
233 sycl_device.deallocate(gpu_data_single_patch);
234 sycl_device.deallocate(gpu_data_twod_patch);
235 sycl_device.deallocate(gpu_data_threed_patch);
239 QueueInterface queueInterface(
s);
240 auto sycl_device = Eigen::SyclDevice(&queueInterface);
241 test_simple_patch_sycl<DataType, RowMajor, int64_t>(sycl_device);
242 test_simple_patch_sycl<DataType, ColMajor, int64_t>(sycl_device);
246 for (
const auto& device :Eigen::get_sycl_supported_devices()) {
247 CALL_SUBTEST(sycl_tensor_patch_test_per_device<float>(device));