14 #define EIGEN_TEST_NO_LONGDOUBLE
15 #define EIGEN_TEST_NO_COMPLEX
17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
18 #define EIGEN_USE_SYCL
21 #include <unsupported/Eigen/CXX11/Tensor>
26 template <
typename DataType,
typename IndexType>
29 IndexType sizeDim1 = 2;
30 IndexType sizeDim2 = 3;
31 IndexType sizeDim3 = 5;
32 IndexType sizeDim4 = 7;
39 DataType* gpu_data_col_major =
static_cast<DataType*
>(sycl_device.allocate(tensor_col_major.
size()*
sizeof(DataType)));
40 DataType* gpu_data_row_major =
static_cast<DataType*
>(sycl_device.allocate(tensor_row_major.
size()*
sizeof(DataType)));
44 sycl_device.memcpyHostToDevice(gpu_data_col_major, tensor_col_major.
data(),(tensor_col_major.
size())*
sizeof(DataType));
45 gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout();
46 sycl_device.memcpyDeviceToHost(tensor_row_major.
data(), gpu_data_row_major, (tensor_col_major.
size())*
sizeof(DataType));
54 array<IndexType, 5> patchColMajorTensorRange={{sizeDim1, 1, 1, sizeDim2*sizeDim3, sizeDim4}};
56 size_t patchTensorBuffSize =single_patch_col_major.
size()*
sizeof(DataType);
57 DataType* gpu_data_single_patch_col_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
59 gpu_single_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(1, 1);
60 sycl_device.memcpyDeviceToHost(single_patch_col_major.
data(), gpu_data_single_patch_col_major, patchTensorBuffSize);
69 array<IndexType, 5> patchRowMajorTensorRange={{sizeDim4, sizeDim2*sizeDim3, 1, 1, sizeDim1}};
71 patchTensorBuffSize =single_patch_row_major.
size()*
sizeof(DataType);
72 DataType* gpu_data_single_patch_row_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
74 gpu_single_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(1, 1);
75 sycl_device.memcpyDeviceToHost(single_patch_row_major.
data(), gpu_data_single_patch_row_major, patchTensorBuffSize);
83 for (IndexType
i = 0;
i < tensor_col_major.
size(); ++
i) {
85 if (tensor_col_major.
data()[
i] != single_patch_col_major.
data()[
i]) {
86 std::cout <<
"Mismatch detected at index colmajor " <<
i <<
" : "
87 << tensor_col_major.
data()[
i] <<
" vs " << single_patch_col_major.
data()[
i]
92 if (tensor_row_major.
data()[
i] != single_patch_row_major.
data()[
i]) {
93 std::cout <<
"Mismatch detected at index row major" <<
i <<
" : "
94 << tensor_row_major.
data()[
i] <<
" vs "
95 << single_patch_row_major.
data()[
i] << std::endl;
98 tensor_row_major.
data()[
i]);
101 single_patch_row_major.
data()[
i]);
106 patchColMajorTensorRange={{sizeDim1, sizeDim2, sizeDim3, sizeDim2*sizeDim3, sizeDim4}};
108 patchTensorBuffSize =entire_image_patch_col_major.
size()*
sizeof(DataType);
109 DataType* gpu_data_entire_image_patch_col_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
111 gpu_entire_image_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(3, 5);
112 sycl_device.memcpyDeviceToHost(entire_image_patch_col_major.
data(), gpu_data_entire_image_patch_col_major, patchTensorBuffSize);
121 patchRowMajorTensorRange={{sizeDim4, sizeDim2*sizeDim3, sizeDim3, sizeDim2, sizeDim1}};
123 patchTensorBuffSize =entire_image_patch_row_major.
size()*
sizeof(DataType);
124 DataType* gpu_data_entire_image_patch_row_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
126 gpu_entire_image_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(3, 5);
127 sycl_device.memcpyDeviceToHost(entire_image_patch_row_major.
data(), gpu_data_entire_image_patch_row_major, patchTensorBuffSize);
135 for (IndexType
i = 0;
i < 3; ++
i) {
136 for (IndexType
j = 0;
j < 5; ++
j) {
137 IndexType patchId =
i+3*
j;
138 for (IndexType r = 0; r < 3; ++r) {
139 for (IndexType
c = 0;
c < 5; ++
c) {
140 for (IndexType
d = 0;
d < 2; ++
d) {
141 for (IndexType
b = 0;
b < 7; ++
b) {
142 DataType expected_col_major = 0.0f;
143 DataType expected_row_major = 0.0f;
144 if (r-1+
i >= 0 &&
c-2+
j >= 0 && r-1+
i < 3 &&
c-2+
j < 5) {
145 expected_col_major = tensor_col_major(
d, r-1+
i,
c-2+
j,
b);
146 expected_row_major = tensor_row_major(
b,
c-2+
j, r-1+
i,
d);
149 if (entire_image_patch_col_major(
d, r,
c, patchId,
b) != expected_col_major) {
150 std::cout <<
"Mismatch detected at index i=" <<
i <<
" j=" <<
j <<
" r=" << r <<
" c=" <<
c <<
" d=" <<
d <<
" b=" <<
b << std::endl;
152 VERIFY_IS_EQUAL(entire_image_patch_col_major(
d, r,
c, patchId,
b), expected_col_major);
154 if (entire_image_patch_row_major(
b, patchId,
c, r,
d) !=
155 expected_row_major) {
156 std::cout <<
"Mismatch detected at index i=" <<
i <<
" j=" <<
j
157 <<
" r=" << r <<
" c=" <<
c <<
" d=" <<
d <<
" b=" <<
b
172 patchColMajorTensorRange={{sizeDim1, 2, 2, sizeDim2*sizeDim3, sizeDim4}};
174 patchTensorBuffSize =twod_patch_col_major.
size()*
sizeof(DataType);
175 DataType* gpu_data_twod_patch_col_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
177 gpu_twod_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(2, 2);
178 sycl_device.memcpyDeviceToHost(twod_patch_col_major.
data(), gpu_data_twod_patch_col_major, patchTensorBuffSize);
187 patchRowMajorTensorRange={{sizeDim4, sizeDim2*sizeDim3, 2, 2, sizeDim1}};
189 patchTensorBuffSize =twod_patch_row_major.
size()*
sizeof(DataType);
190 DataType* gpu_data_twod_patch_row_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
192 gpu_twod_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(2, 2);
193 sycl_device.memcpyDeviceToHost(twod_patch_row_major.
data(), gpu_data_twod_patch_row_major, patchTensorBuffSize);
203 IndexType row_padding = 0;
204 IndexType col_padding = 0;
205 IndexType stride = 1;
207 for (IndexType
i = 0;
i < 3; ++
i) {
208 for (IndexType
j = 0;
j < 5; ++
j) {
209 IndexType patchId =
i+3*
j;
210 for (IndexType r = 0; r < 2; ++r) {
211 for (IndexType
c = 0;
c < 2; ++
c) {
212 for (IndexType
d = 0;
d < 2; ++
d) {
213 for (IndexType
b = 0;
b < 7; ++
b) {
214 DataType expected_col_major = 0.0f;
215 DataType expected_row_major = 0.0f;
216 IndexType row_offset = r*stride +
i - row_padding;
217 IndexType col_offset =
c*stride +
j - col_padding;
219 if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor_col_major.
dimension(1) && col_offset < tensor_col_major.
dimension(2)) {
220 expected_col_major = tensor_col_major(
d, row_offset, col_offset,
b);
222 if (twod_patch_col_major(
d, r,
c, patchId,
b) != expected_col_major) {
223 std::cout <<
"Mismatch detected at index i=" <<
i <<
" j=" <<
j <<
" r=" << r <<
" c=" <<
c <<
" d=" <<
d <<
" b=" <<
b << std::endl;
228 if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor_row_major.
dimension(2) && col_offset < tensor_row_major.
dimension(1)) {
229 expected_row_major = tensor_row_major(
b, col_offset, row_offset,
d);
232 if (twod_patch_row_major(
b, patchId,
c, r,
d) != expected_row_major) {
233 std::cout <<
"Mismatch detected at index i=" <<
i <<
" j=" <<
j <<
" r=" << r <<
" c=" <<
c <<
" d=" <<
d <<
" b=" <<
b << std::endl;
245 sycl_device.deallocate(gpu_data_col_major);
246 sycl_device.deallocate(gpu_data_row_major);
247 sycl_device.deallocate(gpu_data_single_patch_col_major);
248 sycl_device.deallocate(gpu_data_single_patch_row_major);
249 sycl_device.deallocate(gpu_data_entire_image_patch_col_major);
250 sycl_device.deallocate(gpu_data_entire_image_patch_row_major);
251 sycl_device.deallocate(gpu_data_twod_patch_col_major);
252 sycl_device.deallocate(gpu_data_twod_patch_row_major);
258 template <
typename DataType,
typename IndexType>
260 IndexType input_depth = 3;
261 IndexType input_rows = 3;
262 IndexType input_cols = 3;
263 IndexType input_batches = 1;
265 IndexType stride = 2;
267 array<IndexType, 4> tensorColMajorRange = {{input_depth, input_rows, input_cols, input_batches}};
268 array<IndexType, 4> tensorRowMajorRange = {{input_batches, input_cols, input_rows, input_depth}};
272 DataType* gpu_data_col_major =
static_cast<DataType*
>(sycl_device.allocate(tensor_col_major.
size()*
sizeof(DataType)));
273 DataType* gpu_data_row_major =
static_cast<DataType*
>(sycl_device.allocate(tensor_row_major.
size()*
sizeof(DataType)));
277 sycl_device.memcpyHostToDevice(gpu_data_col_major, tensor_col_major.
data(),(tensor_col_major.
size())*
sizeof(DataType));
278 gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout();
279 sycl_device.memcpyDeviceToHost(tensor_row_major.
data(), gpu_data_row_major, (tensor_col_major.
size())*
sizeof(DataType));
287 for (IndexType
i = 0;
i < tensor_col_major.
size(); ++
i) {
288 tensor_col_major.
data()[
i] =
i + 1;
291 array<IndexType, 5> patchColMajorTensorRange={{input_depth, ksize, ksize, 1, input_batches}};
293 size_t patchTensorBuffSize =result_col_major.
size()*
sizeof(DataType);
294 DataType* gpu_data_result_col_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
296 gpu_result_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(ksize, ksize, stride, stride, 1, 1,
PADDING_VALID);
297 sycl_device.memcpyDeviceToHost(result_col_major.
data(), gpu_data_result_col_major, patchTensorBuffSize);
306 array<IndexType, 5> patchRowMajorTensorRange={{input_batches, 1, ksize, ksize, input_depth }};
308 patchTensorBuffSize =result_row_major.
size()*
sizeof(DataType);
309 DataType* gpu_data_result_row_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
311 gpu_result_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(ksize, ksize, stride, stride, 1, 1,
PADDING_VALID);
312 sycl_device.memcpyDeviceToHost(result_row_major.
data(), gpu_data_result_row_major, patchTensorBuffSize);
321 IndexType row_padding = 0;
322 IndexType col_padding = 0;
324 for (IndexType
i = 0; (
i+stride+ksize-1) < input_rows;
i += stride) {
325 for (IndexType
j = 0; (
j+stride+ksize-1) < input_cols;
j += stride) {
326 IndexType patchId =
i+input_rows*
j;
327 for (IndexType r = 0; r < ksize; ++r) {
328 for (IndexType
c = 0;
c < ksize; ++
c) {
329 for (IndexType
d = 0;
d < input_depth; ++
d) {
330 for (IndexType
b = 0;
b < input_batches; ++
b) {
331 DataType expected_col_major = 0.0f;
332 DataType expected_row_major = 0.0f;
333 IndexType row_offset = r +
i - row_padding;
334 IndexType col_offset =
c +
j - col_padding;
335 if (row_offset >= 0 && col_offset >= 0 && row_offset < input_rows && col_offset < input_cols) {
336 expected_col_major = tensor_col_major(
d, row_offset, col_offset,
b);
337 expected_row_major = tensor_row_major(
b, col_offset, row_offset,
d);
340 if (result_col_major(
d, r,
c, patchId,
b) != expected_col_major) {
341 std::cout <<
"Mismatch detected at index i=" <<
i <<
" j=" <<
j <<
" r=" << r <<
" c=" <<
c <<
" d=" <<
d <<
" b=" <<
b << std::endl;
345 if (result_row_major(
b, patchId,
c, r,
d) != expected_row_major) {
346 std::cout <<
"Mismatch detected at index i=" <<
i <<
" j=" <<
j <<
" r=" << r <<
" c=" <<
c <<
" d=" <<
d <<
" b=" <<
b << std::endl;
357 sycl_device.deallocate(gpu_data_col_major);
358 sycl_device.deallocate(gpu_data_row_major);
359 sycl_device.deallocate(gpu_data_result_col_major);
360 sycl_device.deallocate(gpu_data_result_row_major);
364 template <
typename DataType,
typename IndexType>
366 IndexType input_depth = 1;
367 IndexType input_rows = 5;
368 IndexType input_cols = 5;
369 IndexType input_batches = 2;
371 IndexType stride = 2;
374 array<IndexType, 4> tensorColMajorRange = {{input_depth, input_rows, input_cols, input_batches}};
375 array<IndexType, 4> tensorRowMajorRange = {{input_batches, input_cols, input_rows, input_depth}};
379 DataType* gpu_data_col_major =
static_cast<DataType*
>(sycl_device.allocate(tensor_col_major.
size()*
sizeof(DataType)));
380 DataType* gpu_data_row_major =
static_cast<DataType*
>(sycl_device.allocate(tensor_row_major.
size()*
sizeof(DataType)));
383 gpu_col_major.device(sycl_device)=gpu_col_major.constant(11.0
f);
384 gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout();
385 sycl_device.memcpyDeviceToHost(tensor_col_major.
data(), gpu_data_col_major, (tensor_col_major.
size())*
sizeof(DataType));
386 sycl_device.memcpyDeviceToHost(tensor_row_major.
data(), gpu_data_row_major, (tensor_row_major.
size())*
sizeof(DataType));
392 array<IndexType, 5> patchColMajorTensorRange={{input_depth, ksize, ksize, 4, input_batches}};
394 size_t patchTensorBuffSize =result_col_major.
size()*
sizeof(DataType);
395 DataType* gpu_data_result_col_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
397 gpu_result_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(ksize, ksize, stride, stride, 1, 1,
PADDING_VALID);
398 sycl_device.memcpyDeviceToHost(result_col_major.
data(), gpu_data_result_col_major, patchTensorBuffSize);
407 array<IndexType, 5> patchRowMajorTensorRange={{input_batches, 4, ksize, ksize, input_depth }};
409 patchTensorBuffSize =result_row_major.
size()*
sizeof(DataType);
410 DataType* gpu_data_result_row_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
412 gpu_result_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(ksize, ksize, stride, stride, 1, 1,
PADDING_VALID);
413 sycl_device.memcpyDeviceToHost(result_row_major.
data(), gpu_data_result_row_major, patchTensorBuffSize);
422 IndexType row_padding = 0;
423 IndexType col_padding = 0;
425 for (IndexType
i = 0; (
i+stride+ksize-1) <= input_rows;
i += stride) {
426 for (IndexType
j = 0; (
j+stride+ksize-1) <= input_cols;
j += stride) {
427 IndexType patchId =
i+input_rows*
j;
428 for (IndexType r = 0; r < ksize; ++r) {
429 for (IndexType
c = 0;
c < ksize; ++
c) {
430 for (IndexType
d = 0;
d < input_depth; ++
d) {
431 for (IndexType
b = 0;
b < input_batches; ++
b) {
432 DataType expected_col_major = 0.0f;
433 DataType expected_row_major = 0.0f;
434 IndexType row_offset = r +
i - row_padding;
435 IndexType col_offset =
c +
j - col_padding;
436 if (row_offset >= 0 && col_offset >= 0 && row_offset < input_rows && col_offset < input_cols) {
437 expected_col_major = tensor_col_major(
d, row_offset, col_offset,
b);
438 expected_row_major = tensor_row_major(
b, col_offset, row_offset,
d);
441 if (result_col_major(
d, r,
c, patchId,
b) != expected_col_major) {
442 std::cout <<
"Mismatch detected at index i=" <<
i <<
" j=" <<
j <<
" r=" << r <<
" c=" <<
c <<
" d=" <<
d <<
" b=" <<
b << std::endl;
446 if (result_row_major(
b, patchId,
c, r,
d) != expected_row_major) {
447 std::cout <<
"Mismatch detected at index i=" <<
i <<
" j=" <<
j <<
" r=" << r <<
" c=" <<
c <<
" d=" <<
d <<
" b=" <<
b << std::endl;
461 template <
typename DataType,
typename IndexType>
463 IndexType input_depth = 3;
464 IndexType input_rows = 4;
465 IndexType input_cols = 2;
466 IndexType input_batches = 1;
468 IndexType stride = 2;
471 array<IndexType, 4> tensorColMajorRange = {{input_depth, input_rows, input_cols, input_batches}};
472 array<IndexType, 4> tensorRowMajorRange = {{input_batches, input_cols, input_rows, input_depth}};
476 DataType* gpu_data_col_major =
static_cast<DataType*
>(sycl_device.allocate(tensor_col_major.
size()*
sizeof(DataType)));
477 DataType* gpu_data_row_major =
static_cast<DataType*
>(sycl_device.allocate(tensor_row_major.
size()*
sizeof(DataType)));
481 sycl_device.memcpyHostToDevice(gpu_data_col_major, tensor_col_major.
data(),(tensor_col_major.
size())*
sizeof(DataType));
482 gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout();
483 sycl_device.memcpyDeviceToHost(tensor_row_major.
data(), gpu_data_row_major, (tensor_col_major.
size())*
sizeof(DataType));
491 for (IndexType
i = 0;
i < tensor_col_major.
size(); ++
i) {
492 tensor_col_major.
data()[
i] =
i + 1;
495 array<IndexType, 5> patchColMajorTensorRange={{input_depth, ksize, ksize, 2, input_batches}};
497 size_t patchTensorBuffSize =result_col_major.
size()*
sizeof(DataType);
498 DataType* gpu_data_result_col_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
500 gpu_result_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(ksize, ksize, stride, stride,
PADDING_SAME);
501 sycl_device.memcpyDeviceToHost(result_col_major.
data(), gpu_data_result_col_major, patchTensorBuffSize);
512 array<IndexType, 5> patchRowMajorTensorRange={{input_batches, 2, ksize, ksize, input_depth }};
514 patchTensorBuffSize =result_row_major.
size()*
sizeof(DataType);
515 DataType* gpu_data_result_row_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
517 gpu_result_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(ksize, ksize, stride, stride,
PADDING_SAME);
518 sycl_device.memcpyDeviceToHost(result_row_major.
data(), gpu_data_result_row_major, patchTensorBuffSize);
527 IndexType row_padding = 0;
528 IndexType col_padding = 0;
530 for (IndexType
i = 0; (
i+stride+ksize-1) <= input_rows;
i += stride) {
531 for (IndexType
j = 0; (
j+stride+ksize-1) <= input_cols;
j += stride) {
532 IndexType patchId =
i+input_rows*
j;
533 for (IndexType r = 0; r < ksize; ++r) {
534 for (IndexType
c = 0;
c < ksize; ++
c) {
535 for (IndexType
d = 0;
d < input_depth; ++
d) {
536 for (IndexType
b = 0;
b < input_batches; ++
b) {
537 DataType expected_col_major = 0.0f;
538 DataType expected_row_major = 0.0f;
539 IndexType row_offset = r*stride +
i - row_padding;
540 IndexType col_offset =
c*stride +
j - col_padding;
541 if (row_offset >= 0 && col_offset >= 0 && row_offset < input_rows && col_offset < input_cols) {
542 expected_col_major = tensor_col_major(
d, row_offset, col_offset,
b);
543 expected_row_major = tensor_row_major(
b, col_offset, row_offset,
d);
546 if (result_col_major(
d, r,
c, patchId,
b) != expected_col_major) {
547 std::cout <<
"Mismatch detected at index i=" <<
i <<
" j=" <<
j <<
" r=" << r <<
" c=" <<
c <<
" d=" <<
d <<
" b=" <<
b << std::endl;
551 if (result_row_major(
b, patchId,
c, r,
d) != expected_row_major) {
552 std::cout <<
"Mismatch detected at index i=" <<
i <<
" j=" <<
j <<
" r=" << r <<
" c=" <<
c <<
" d=" <<
d <<
" b=" <<
b << std::endl;
566 template <
typename DataType,
typename IndexType>
569 IndexType sizeDim1 = 2;
570 IndexType sizeDim2 = 3;
571 IndexType sizeDim3 = 5;
580 DataType* gpu_data_col_major =
static_cast<DataType*
>(sycl_device.allocate(tensor_col_major.
size()*
sizeof(DataType)));
581 DataType* gpu_data_row_major =
static_cast<DataType*
>(sycl_device.allocate(tensor_row_major.
size()*
sizeof(DataType)));
585 sycl_device.memcpyHostToDevice(gpu_data_col_major, tensor_col_major.
data(),(tensor_col_major.
size())*
sizeof(DataType));
586 gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout();
587 sycl_device.memcpyDeviceToHost(tensor_row_major.
data(), gpu_data_row_major, (tensor_row_major.
size())*
sizeof(DataType));
597 size_t patchTensorBuffSize =single_patch_col_major.
size()*
sizeof(DataType);
598 DataType* gpu_data_single_patch_col_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
600 gpu_single_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(1, 1);
601 sycl_device.memcpyDeviceToHost(single_patch_col_major.
data(), gpu_data_single_patch_col_major, patchTensorBuffSize);
611 patchTensorBuffSize =single_patch_row_major.
size()*
sizeof(DataType);
612 DataType* gpu_data_single_patch_row_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
614 gpu_single_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(1, 1);
615 sycl_device.memcpyDeviceToHost(single_patch_row_major.
data(), gpu_data_single_patch_row_major, patchTensorBuffSize);
622 for (IndexType
i = 0;
i < tensor_col_major.
size(); ++
i) {
624 if (tensor_col_major.
data()[
i] != single_patch_col_major.
data()[
i]) {
625 std::cout <<
"Mismatch detected at index " <<
i <<
" : " << tensor_col_major.
data()[
i] <<
" vs " << single_patch_col_major.
data()[
i] << std::endl;
629 if (tensor_row_major.
data()[
i] != single_patch_row_major.
data()[
i]) {
630 std::cout <<
"Mismatch detected at index " <<
i <<
" : "
631 << tensor_col_major.
data()[
i] <<
" vs "
632 << single_patch_row_major.
data()[
i] << std::endl;
635 tensor_row_major.
data()[
i]);
638 single_patch_row_major.
data()[
i]);
642 patchColMajorTensorRange={{sizeDim1, sizeDim2, sizeDim3, sizeDim2*sizeDim3}};
644 patchTensorBuffSize =entire_image_patch_col_major.
size()*
sizeof(DataType);
645 DataType* gpu_data_entire_image_patch_col_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
647 gpu_entire_image_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(3, 5);
648 sycl_device.memcpyDeviceToHost(entire_image_patch_col_major.
data(), gpu_data_entire_image_patch_col_major, patchTensorBuffSize);
656 patchRowMajorTensorRange={{sizeDim2*sizeDim3, sizeDim3, sizeDim2, sizeDim1}};
658 patchTensorBuffSize =entire_image_patch_row_major.
size()*
sizeof(DataType);
659 DataType* gpu_data_entire_image_patch_row_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
661 gpu_entire_image_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(3, 5);
662 sycl_device.memcpyDeviceToHost(entire_image_patch_row_major.
data(), gpu_data_entire_image_patch_row_major, patchTensorBuffSize);
668 for (IndexType
i = 0;
i < 3; ++
i) {
669 for (IndexType
j = 0;
j < 5; ++
j) {
670 IndexType patchId =
i+3*
j;
671 for (IndexType r = 0; r < 3; ++r) {
672 for (IndexType
c = 0;
c < 5; ++
c) {
673 for (IndexType
d = 0;
d < 2; ++
d) {
674 DataType expected_col_major = 0.0f;
675 DataType expected_row_major = 0.0f;
676 if (r-1+
i >= 0 &&
c-2+
j >= 0 && r-1+
i < 3 &&
c-2+
j < 5) {
677 expected_col_major = tensor_col_major(
d, r-1+
i,
c-2+
j);
678 expected_row_major = tensor_row_major(
c-2+
j, r-1+
i,
d);
681 if (entire_image_patch_col_major(
d, r,
c, patchId) != expected_col_major) {
682 std::cout <<
"Mismatch detected at index i=" <<
i <<
" j=" <<
j <<
" r=" << r <<
" c=" <<
c <<
" d=" <<
d << std::endl;
684 VERIFY_IS_EQUAL(entire_image_patch_col_major(
d, r,
c, patchId), expected_col_major);
686 if (entire_image_patch_row_major(patchId,
c, r,
d) !=
687 expected_row_major) {
688 std::cout <<
"Mismatch detected at index i=" <<
i <<
" j=" <<
j <<
" r=" << r <<
" c=" <<
c <<
" d=" <<
d << std::endl;
701 patchColMajorTensorRange={{sizeDim1, 2, 2, sizeDim2*sizeDim3}};
703 patchTensorBuffSize =twod_patch_col_major.
size()*
sizeof(DataType);
704 DataType* gpu_data_twod_patch_col_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
706 gpu_twod_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(2, 2);
707 sycl_device.memcpyDeviceToHost(twod_patch_col_major.
data(), gpu_data_twod_patch_col_major, patchTensorBuffSize);
715 patchRowMajorTensorRange={{sizeDim2*sizeDim3, 2, 2, sizeDim1}};
717 patchTensorBuffSize =twod_patch_row_major.
size()*
sizeof(DataType);
718 DataType* gpu_data_twod_patch_row_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
720 gpu_twod_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(2, 2);
721 sycl_device.memcpyDeviceToHost(twod_patch_row_major.
data(), gpu_data_twod_patch_row_major, patchTensorBuffSize);
728 IndexType row_padding = 0;
729 IndexType col_padding = 0;
730 IndexType stride = 1;
732 for (IndexType
i = 0;
i < 3; ++
i) {
733 for (IndexType
j = 0;
j < 5; ++
j) {
734 IndexType patchId =
i+3*
j;
735 for (IndexType r = 0; r < 2; ++r) {
736 for (IndexType
c = 0;
c < 2; ++
c) {
737 for (IndexType
d = 0;
d < 2; ++
d) {
738 DataType expected_col_major = 0.0f;
739 DataType expected_row_major = 0.0f;
740 IndexType row_offset = r*stride +
i - row_padding;
741 IndexType col_offset =
c*stride +
j - col_padding;
743 if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor_col_major.
dimension(1) && col_offset < tensor_col_major.
dimension(2)) {
744 expected_col_major = tensor_col_major(
d, row_offset, col_offset);
746 if (twod_patch_col_major(
d, r,
c, patchId) != expected_col_major) {
747 std::cout <<
"Mismatch detected at index i=" <<
i <<
" j=" <<
j <<
" r=" << r <<
" c=" <<
c <<
" d=" <<
d << std::endl;
751 if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor_row_major.
dimension(1) && col_offset < tensor_row_major.
dimension(0)) {
752 expected_row_major = tensor_row_major(col_offset, row_offset,
d);
754 if (twod_patch_row_major(patchId,
c, r,
d) != expected_row_major) {
755 std::cout <<
"Mismatch detected at index i=" <<
i <<
" j=" <<
j <<
" r=" << r <<
" c=" <<
c <<
" d=" <<
d << std::endl;
766 sycl_device.deallocate(gpu_data_col_major);
767 sycl_device.deallocate(gpu_data_row_major);
768 sycl_device.deallocate(gpu_data_single_patch_col_major);
769 sycl_device.deallocate(gpu_data_single_patch_row_major);
770 sycl_device.deallocate(gpu_data_entire_image_patch_col_major);
771 sycl_device.deallocate(gpu_data_entire_image_patch_row_major);
772 sycl_device.deallocate(gpu_data_twod_patch_col_major);
773 sycl_device.deallocate(gpu_data_twod_patch_row_major);
776 template <
typename DataType,
typename IndexType>
782 IndexType sizeDim1 = 3;
783 IndexType sizeDim2 = 128;
784 IndexType sizeDim3 = 128;
785 IndexType sizeDim4 = 16;
790 DataType* gpu_data_l_in_col_major =
static_cast<DataType*
>(sycl_device.allocate(l_in_col_major.
size()*
sizeof(DataType)));
793 sycl_device.memcpyHostToDevice(gpu_data_l_in_col_major, l_in_col_major.
data(),(l_in_col_major.
size())*
sizeof(DataType));
797 size_t patchTensorBuffSize =l_out_col_major.
size()*
sizeof(DataType);
798 DataType* gpu_data_l_out_col_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
800 gpu_l_out_col_major.device(sycl_device)=gpu_l_in_col_major.extract_image_patches(11, 11);
801 sycl_device.memcpyDeviceToHost(l_out_col_major.
data(), gpu_data_l_out_col_major, patchTensorBuffSize);
810 patchTensorRange={{sizeDim4, sizeDim2*sizeDim3, 11, 11, sizeDim1}};
812 patchTensorBuffSize =l_out_row_major.
size()*
sizeof(DataType);
813 DataType* gpu_data_l_out_row_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
815 gpu_l_out_row_major.device(sycl_device)=gpu_l_in_col_major.swap_layout().extract_image_patches(11, 11);
816 sycl_device.memcpyDeviceToHost(l_out_row_major.
data(), gpu_data_l_out_row_major, patchTensorBuffSize);
824 for (IndexType
b = 0;
b < 16; ++
b) {
825 for (IndexType
i = 0;
i < 128; ++
i) {
826 for (IndexType
j = 0;
j < 128; ++
j) {
827 IndexType patchId =
i+128*
j;
828 for (IndexType
c = 0;
c < 11; ++
c) {
829 for (IndexType r = 0; r < 11; ++r) {
830 for (IndexType
d = 0;
d < 3; ++
d) {
832 if (r-5+
i >= 0 &&
c-5+
j >= 0 && r-5+
i < 128 &&
c-5+
j < 128) {
836 if (l_out_col_major(
d, r,
c, patchId,
b) !=
expected) {
837 std::cout <<
"Mismatch detected at index i=" <<
i <<
" j=" <<
j <<
" r=" << r <<
" c=" <<
c <<
" d=" <<
d <<
" b=" <<
b << std::endl;
841 if (l_out_row_major(
b, patchId,
c, r,
d) !=
843 std::cout <<
"Mismatch detected at index i=" <<
i <<
" j=" <<
j
844 <<
" r=" << r <<
" c=" <<
c <<
" d=" <<
d <<
" b=" <<
b
857 sycl_device.deallocate(gpu_data_l_in_col_major);
858 sycl_device.deallocate(gpu_data_l_out_col_major);
863 tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
864 l_in_col_major.
resize(tensorColMajorRange);
866 gpu_data_l_in_col_major =
static_cast<DataType*
>(sycl_device.allocate(l_in_col_major.
size()*
sizeof(DataType)));
869 patchTensorRange={{sizeDim1, 9, 9, sizeDim2*sizeDim3, sizeDim4}};
870 l_out_col_major.
resize(patchTensorRange);
871 patchTensorBuffSize =l_out_col_major.
size()*
sizeof(DataType);
872 gpu_data_l_out_col_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
874 sycl_device.memcpyHostToDevice(gpu_data_l_in_col_major, l_in_col_major.
data(),(l_in_col_major.
size())*
sizeof(DataType));
875 gpu_l_out_col_major_resize1.device(sycl_device)=gpu_l_in_col_major_resize1.extract_image_patches(9, 9);
876 sycl_device.memcpyDeviceToHost(l_out_col_major.
data(), gpu_data_l_out_col_major, patchTensorBuffSize);
884 sycl_device.deallocate(gpu_data_l_out_row_major);
885 patchTensorRange={{sizeDim4, sizeDim2*sizeDim3, 9, 9 ,sizeDim1}};
886 l_out_row_major.
resize(patchTensorRange);
887 patchTensorBuffSize =l_out_row_major.
size()*
sizeof(DataType);
888 gpu_data_l_out_row_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
890 gpu_l_out_row_major_resize1.device(sycl_device)=gpu_l_in_col_major_resize1.swap_layout().extract_image_patches(9, 9);
891 sycl_device.memcpyDeviceToHost(l_out_row_major.
data(), gpu_data_l_out_row_major, patchTensorBuffSize);
899 for (IndexType
b = 0;
b < 32; ++
b) {
900 for (IndexType
i = 0;
i < 64; ++
i) {
901 for (IndexType
j = 0;
j < 64; ++
j) {
902 IndexType patchId =
i+64*
j;
903 for (IndexType
c = 0;
c < 9; ++
c) {
904 for (IndexType r = 0; r < 9; ++r) {
905 for (IndexType
d = 0;
d < 16; ++
d) {
907 if (r-4+
i >= 0 &&
c-4+
j >= 0 && r-4+
i < 64 &&
c-4+
j < 64) {
911 if (l_out_col_major(
d, r,
c, patchId,
b) !=
expected) {
912 std::cout <<
"Mismatch detected at index i=" <<
i <<
" j=" <<
j <<
" r=" << r <<
" c=" <<
c <<
" d=" <<
d <<
" b=" <<
b << std::endl;
916 if (l_out_row_major(
b, patchId,
c, r,
d) !=
expected) {
917 std::cout <<
"Mismatch detected at index i=" <<
i <<
" j=" <<
j <<
" r=" << r <<
" c=" <<
c <<
" d=" <<
d <<
" b=" <<
b << std::endl;
929 sycl_device.deallocate(gpu_data_l_in_col_major);
930 sycl_device.deallocate(gpu_data_l_out_col_major);
935 tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
936 l_in_col_major.
resize(tensorColMajorRange);
938 gpu_data_l_in_col_major =
static_cast<DataType*
>(sycl_device.allocate(l_in_col_major.
size()*
sizeof(DataType)));
941 patchTensorRange={{sizeDim1, 7, 7, sizeDim2*sizeDim3, sizeDim4}};
942 l_out_col_major.
resize(patchTensorRange);
943 patchTensorBuffSize =l_out_col_major.
size()*
sizeof(DataType);
944 gpu_data_l_out_col_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
946 sycl_device.memcpyHostToDevice(gpu_data_l_in_col_major, l_in_col_major.
data(),(l_in_col_major.
size())*
sizeof(DataType));
947 gpu_l_out_col_major_resize2.device(sycl_device)=gpu_l_in_col_major_resize2.extract_image_patches(7, 7);
948 sycl_device.memcpyDeviceToHost(l_out_col_major.
data(), gpu_data_l_out_col_major, patchTensorBuffSize);
957 sycl_device.deallocate(gpu_data_l_out_row_major);
958 patchTensorRange={{sizeDim4, sizeDim2*sizeDim3, 7, 7 ,sizeDim1}};
959 l_out_row_major.
resize(patchTensorRange);
960 patchTensorBuffSize =l_out_row_major.
size()*
sizeof(DataType);
961 gpu_data_l_out_row_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
963 gpu_l_out_row_major_resize2.device(sycl_device)=gpu_l_in_col_major_resize2.swap_layout().extract_image_patches(7, 7);
964 sycl_device.memcpyDeviceToHost(l_out_row_major.
data(), gpu_data_l_out_row_major, patchTensorBuffSize);
972 for (IndexType
b = 0;
b < 32; ++
b) {
973 for (IndexType
i = 0;
i < 16; ++
i) {
974 for (IndexType
j = 0;
j < 16; ++
j) {
975 IndexType patchId =
i+16*
j;
976 for (IndexType
c = 0;
c < 7; ++
c) {
977 for (IndexType r = 0; r < 7; ++r) {
978 for (IndexType
d = 0;
d < 32; ++
d) {
980 if (r-3+
i >= 0 &&
c-3+
j >= 0 && r-3+
i < 16 &&
c-3+
j < 16) {
984 if (l_out_col_major(
d, r,
c, patchId,
b) !=
expected) {
985 std::cout <<
"Mismatch detected at index i=" <<
i <<
" j=" <<
j <<
" r=" << r <<
" c=" <<
c <<
" d=" <<
d <<
" b=" <<
b << std::endl;
989 if (l_out_row_major(
b, patchId,
c, r,
d) !=
expected) {
990 std::cout <<
"Mismatch detected at index i=" <<
i <<
" j=" <<
j <<
" r=" << r <<
" c=" <<
c <<
" d=" <<
d <<
" b=" <<
b << std::endl;
1001 sycl_device.deallocate(gpu_data_l_in_col_major);
1002 sycl_device.deallocate(gpu_data_l_out_col_major);
1007 tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
1008 l_in_col_major.
resize(tensorColMajorRange);
1010 gpu_data_l_in_col_major =
static_cast<DataType*
>(sycl_device.allocate(l_in_col_major.
size()*
sizeof(DataType)));
1013 patchTensorRange={{sizeDim1, 3, 3, sizeDim2*sizeDim3, sizeDim4}};
1014 l_out_col_major.
resize(patchTensorRange);
1015 patchTensorBuffSize =l_out_col_major.
size()*
sizeof(DataType);
1016 gpu_data_l_out_col_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
1018 sycl_device.memcpyHostToDevice(gpu_data_l_in_col_major, l_in_col_major.
data(),(l_in_col_major.
size())*
sizeof(DataType));
1019 gpu_l_out_col_major_resize3.device(sycl_device)=gpu_l_in_col_major_resize3.extract_image_patches(3, 3);
1020 sycl_device.memcpyDeviceToHost(l_out_col_major.
data(), gpu_data_l_out_col_major, patchTensorBuffSize);
1029 sycl_device.deallocate(gpu_data_l_out_row_major);
1030 patchTensorRange={{sizeDim4, sizeDim2*sizeDim3, 3, 3 ,sizeDim1}};
1031 l_out_row_major.
resize(patchTensorRange);
1032 patchTensorBuffSize =l_out_row_major.
size()*
sizeof(DataType);
1033 gpu_data_l_out_row_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
1035 gpu_l_out_row_major_resize3.device(sycl_device)=gpu_l_in_col_major_resize3.swap_layout().extract_image_patches(3, 3);
1036 sycl_device.memcpyDeviceToHost(l_out_row_major.
data(), gpu_data_l_out_row_major, patchTensorBuffSize);
1044 for (IndexType
b = 0;
b < 32; ++
b) {
1045 for (IndexType
i = 0;
i < 13; ++
i) {
1046 for (IndexType
j = 0;
j < 13; ++
j) {
1047 IndexType patchId =
i+13*
j;
1048 for (IndexType
c = 0;
c < 3; ++
c) {
1049 for (IndexType r = 0; r < 3; ++r) {
1050 for (IndexType
d = 0;
d < 64; ++
d) {
1052 if (r-1+
i >= 0 &&
c-1+
j >= 0 && r-1+
i < 13 &&
c-1+
j < 13) {
1056 if (l_out_col_major(
d, r,
c, patchId,
b) !=
expected) {
1057 std::cout <<
"Mismatch detected at index i=" <<
i <<
" j=" <<
j <<
" r=" << r <<
" c=" <<
c <<
" d=" <<
d <<
" b=" <<
b << std::endl;
1061 if (l_out_row_major(
b, patchId,
c, r,
d) !=
expected) {
1062 std::cout <<
"Mismatch detected at index i=" <<
i <<
" j=" <<
j <<
" r=" << r <<
" c=" <<
c <<
" d=" <<
d <<
" b=" <<
b << std::endl;
1071 sycl_device.deallocate(gpu_data_l_in_col_major);
1072 sycl_device.deallocate(gpu_data_l_out_col_major);
1073 sycl_device.deallocate(gpu_data_l_out_row_major);
1078 QueueInterface queueInterface(
s);
1079 auto sycl_device = Eigen::SyclDevice(&queueInterface);
1080 test_simple_image_patch_sycl<DataType, int64_t>(sycl_device);
1081 test_patch_padding_valid_sycl<DataType, int64_t>(sycl_device);
1082 test_patch_padding_valid_same_value_sycl<DataType, int64_t>(sycl_device);
1083 test_patch_padding_same_sycl<DataType, int64_t>(sycl_device);
1084 test_patch_no_extra_dim_sycl<DataType, int64_t>(sycl_device);
1085 test_imagenet_patches_sycl<DataType, int64_t>(sycl_device);
1089 for (
const auto& device :Eigen::get_sycl_supported_devices()) {
1090 CALL_SUBTEST(sycl_tensor_image_patch_test_per_device<float>(device));