cxx11_tensor_image_patch_sycl.cpp
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2016
5 // Mehdi Goli Codeplay Software Ltd.
6 // Ralph Potter Codeplay Software Ltd.
7 // Luke Iwanski Codeplay Software Ltd.
8 // Contact: <eigen@codeplay.com>
9 //
10 // This Source Code Form is subject to the terms of the Mozilla
11 // Public License v. 2.0. If a copy of the MPL was not distributed
12 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
13 
14 #define EIGEN_TEST_NO_LONGDOUBLE
15 #define EIGEN_TEST_NO_COMPLEX
16 
17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
18 #define EIGEN_USE_SYCL
19 
20 #include "main.h"
21 #include <unsupported/Eigen/CXX11/Tensor>
22 
23 using Eigen::Tensor;
24 static const int DataLayout = ColMajor;
25 
26 template <typename DataType, typename IndexType>
27 static void test_simple_image_patch_sycl(const Eigen::SyclDevice& sycl_device)
28 {
29  IndexType sizeDim1 = 2;
30  IndexType sizeDim2 = 3;
31  IndexType sizeDim3 = 5;
32  IndexType sizeDim4 = 7;
33  array<IndexType, 4> tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
34  array<IndexType, 4> tensorRowMajorRange = {{sizeDim4, sizeDim3, sizeDim2, sizeDim1}};
35  Tensor<DataType, 4, DataLayout,IndexType> tensor_col_major(tensorColMajorRange);
36  Tensor<DataType, 4, RowMajor,IndexType> tensor_row_major(tensorRowMajorRange);
37  tensor_col_major.setRandom();
38 
39  DataType* gpu_data_col_major = static_cast<DataType*>(sycl_device.allocate(tensor_col_major.size()*sizeof(DataType)));
40  DataType* gpu_data_row_major = static_cast<DataType*>(sycl_device.allocate(tensor_row_major.size()*sizeof(DataType)));
41  TensorMap<Tensor<DataType, 4, ColMajor, IndexType>> gpu_col_major(gpu_data_col_major, tensorColMajorRange);
42  TensorMap<Tensor<DataType, 4, RowMajor, IndexType>> gpu_row_major(gpu_data_row_major, tensorRowMajorRange);
43 
44  sycl_device.memcpyHostToDevice(gpu_data_col_major, tensor_col_major.data(),(tensor_col_major.size())*sizeof(DataType));
45  gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout();
46  sycl_device.memcpyDeviceToHost(tensor_row_major.data(), gpu_data_row_major, (tensor_col_major.size())*sizeof(DataType));
47 
48  VERIFY_IS_EQUAL(tensor_col_major.dimension(0), tensor_row_major.dimension(3));
49  VERIFY_IS_EQUAL(tensor_col_major.dimension(1), tensor_row_major.dimension(2));
50  VERIFY_IS_EQUAL(tensor_col_major.dimension(2), tensor_row_major.dimension(1));
51  VERIFY_IS_EQUAL(tensor_col_major.dimension(3), tensor_row_major.dimension(0));
52 
53  // Single pixel patch: ColMajor
54  array<IndexType, 5> patchColMajorTensorRange={{sizeDim1, 1, 1, sizeDim2*sizeDim3, sizeDim4}};
55  Tensor<DataType, 5, DataLayout,IndexType> single_patch_col_major(patchColMajorTensorRange);
56  size_t patchTensorBuffSize =single_patch_col_major.size()*sizeof(DataType);
57  DataType* gpu_data_single_patch_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
58  TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_single_patch_col_major(gpu_data_single_patch_col_major, patchColMajorTensorRange);
59  gpu_single_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(1, 1);
60  sycl_device.memcpyDeviceToHost(single_patch_col_major.data(), gpu_data_single_patch_col_major, patchTensorBuffSize);
61 
62  VERIFY_IS_EQUAL(single_patch_col_major.dimension(0), 2);
63  VERIFY_IS_EQUAL(single_patch_col_major.dimension(1), 1);
64  VERIFY_IS_EQUAL(single_patch_col_major.dimension(2), 1);
65  VERIFY_IS_EQUAL(single_patch_col_major.dimension(3), 3*5);
66  VERIFY_IS_EQUAL(single_patch_col_major.dimension(4), 7);
67 
68  // Single pixel patch: RowMajor
69  array<IndexType, 5> patchRowMajorTensorRange={{sizeDim4, sizeDim2*sizeDim3, 1, 1, sizeDim1}};
70  Tensor<DataType, 5, RowMajor,IndexType> single_patch_row_major(patchRowMajorTensorRange);
71  patchTensorBuffSize =single_patch_row_major.size()*sizeof(DataType);
72  DataType* gpu_data_single_patch_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
73  TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_single_patch_row_major(gpu_data_single_patch_row_major, patchRowMajorTensorRange);
74  gpu_single_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(1, 1);
75  sycl_device.memcpyDeviceToHost(single_patch_row_major.data(), gpu_data_single_patch_row_major, patchTensorBuffSize);
76 
77  VERIFY_IS_EQUAL(single_patch_row_major.dimension(0), 7);
78  VERIFY_IS_EQUAL(single_patch_row_major.dimension(1), 3*5);
79  VERIFY_IS_EQUAL(single_patch_row_major.dimension(2), 1);
80  VERIFY_IS_EQUAL(single_patch_row_major.dimension(3), 1);
81  VERIFY_IS_EQUAL(single_patch_row_major.dimension(4), 2);
82 
83  for (IndexType i = 0; i < tensor_col_major.size(); ++i) {
84  // ColMajor
85  if (tensor_col_major.data()[i] != single_patch_col_major.data()[i]) {
86  std::cout << "Mismatch detected at index colmajor " << i << " : "
87  << tensor_col_major.data()[i] << " vs " << single_patch_col_major.data()[i]
88  << std::endl;
89  }
90  VERIFY_IS_EQUAL(single_patch_col_major.data()[i], tensor_col_major.data()[i]);
91  // RowMajor
92  if (tensor_row_major.data()[i] != single_patch_row_major.data()[i]) {
93  std::cout << "Mismatch detected at index row major" << i << " : "
94  << tensor_row_major.data()[i] << " vs "
95  << single_patch_row_major.data()[i] << std::endl;
96  }
97  VERIFY_IS_EQUAL(single_patch_row_major.data()[i],
98  tensor_row_major.data()[i]);
99  VERIFY_IS_EQUAL(tensor_col_major.data()[i], tensor_row_major.data()[i]);
100  VERIFY_IS_EQUAL(single_patch_col_major.data()[i],
101  single_patch_row_major.data()[i]);
102  }
103 
104 
105  // Entire image patch: ColMajor
106  patchColMajorTensorRange={{sizeDim1, sizeDim2, sizeDim3, sizeDim2*sizeDim3, sizeDim4}};
107  Tensor<DataType, 5, DataLayout,IndexType> entire_image_patch_col_major(patchColMajorTensorRange);
108  patchTensorBuffSize =entire_image_patch_col_major.size()*sizeof(DataType);
109  DataType* gpu_data_entire_image_patch_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
110  TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_entire_image_patch_col_major(gpu_data_entire_image_patch_col_major, patchColMajorTensorRange);
111  gpu_entire_image_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(3, 5);
112  sycl_device.memcpyDeviceToHost(entire_image_patch_col_major.data(), gpu_data_entire_image_patch_col_major, patchTensorBuffSize);
113 
114  VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(0), 2);
115  VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(1), 3);
116  VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(2), 5);
117  VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(3), 3*5);
118  VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(4), 7);
119 
120  // Entire image patch: RowMajor
121  patchRowMajorTensorRange={{sizeDim4, sizeDim2*sizeDim3, sizeDim3, sizeDim2, sizeDim1}};
122  Tensor<DataType, 5, RowMajor,IndexType> entire_image_patch_row_major(patchRowMajorTensorRange);
123  patchTensorBuffSize =entire_image_patch_row_major.size()*sizeof(DataType);
124  DataType* gpu_data_entire_image_patch_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
125  TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_entire_image_patch_row_major(gpu_data_entire_image_patch_row_major, patchRowMajorTensorRange);
126  gpu_entire_image_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(3, 5);
127  sycl_device.memcpyDeviceToHost(entire_image_patch_row_major.data(), gpu_data_entire_image_patch_row_major, patchTensorBuffSize);
128 
129  VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(0), 7);
130  VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(1), 3*5);
131  VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(2), 5);
132  VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(3), 3);
133  VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(4), 2);
134 
135  for (IndexType i = 0; i < 3; ++i) {
136  for (IndexType j = 0; j < 5; ++j) {
137  IndexType patchId = i+3*j;
138  for (IndexType r = 0; r < 3; ++r) {
139  for (IndexType c = 0; c < 5; ++c) {
140  for (IndexType d = 0; d < 2; ++d) {
141  for (IndexType b = 0; b < 7; ++b) {
142  DataType expected_col_major = 0.0f;
143  DataType expected_row_major = 0.0f;
144  if (r-1+i >= 0 && c-2+j >= 0 && r-1+i < 3 && c-2+j < 5) {
145  expected_col_major = tensor_col_major(d, r-1+i, c-2+j, b);
146  expected_row_major = tensor_row_major(b, c-2+j, r-1+i, d);
147  }
148  // ColMajor
149  if (entire_image_patch_col_major(d, r, c, patchId, b) != expected_col_major) {
150  std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
151  }
152  VERIFY_IS_EQUAL(entire_image_patch_col_major(d, r, c, patchId, b), expected_col_major);
153  // RowMajor
154  if (entire_image_patch_row_major(b, patchId, c, r, d) !=
155  expected_row_major) {
156  std::cout << "Mismatch detected at index i=" << i << " j=" << j
157  << " r=" << r << " c=" << c << " d=" << d << " b=" << b
158  << std::endl;
159  }
160  VERIFY_IS_EQUAL(entire_image_patch_row_major(b, patchId, c, r, d),
161  expected_row_major);
162  // Check that ColMajor and RowMajor agree.
163  VERIFY_IS_EQUAL(expected_col_major, expected_row_major);
164  }
165  }
166  }
167  }
168  }
169  }
170 
171  // 2D patch: ColMajor
172  patchColMajorTensorRange={{sizeDim1, 2, 2, sizeDim2*sizeDim3, sizeDim4}};
173  Tensor<DataType, 5, DataLayout,IndexType> twod_patch_col_major(patchColMajorTensorRange);
174  patchTensorBuffSize =twod_patch_col_major.size()*sizeof(DataType);
175  DataType* gpu_data_twod_patch_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
176  TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_twod_patch_col_major(gpu_data_twod_patch_col_major, patchColMajorTensorRange);
177  gpu_twod_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(2, 2);
178  sycl_device.memcpyDeviceToHost(twod_patch_col_major.data(), gpu_data_twod_patch_col_major, patchTensorBuffSize);
179 
180  VERIFY_IS_EQUAL(twod_patch_col_major.dimension(0), 2);
181  VERIFY_IS_EQUAL(twod_patch_col_major.dimension(1), 2);
182  VERIFY_IS_EQUAL(twod_patch_col_major.dimension(2), 2);
183  VERIFY_IS_EQUAL(twod_patch_col_major.dimension(3), 3*5);
184  VERIFY_IS_EQUAL(twod_patch_col_major.dimension(4), 7);
185 
186  // 2D patch: RowMajor
187  patchRowMajorTensorRange={{sizeDim4, sizeDim2*sizeDim3, 2, 2, sizeDim1}};
188  Tensor<DataType, 5, RowMajor,IndexType> twod_patch_row_major(patchRowMajorTensorRange);
189  patchTensorBuffSize =twod_patch_row_major.size()*sizeof(DataType);
190  DataType* gpu_data_twod_patch_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
191  TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_twod_patch_row_major(gpu_data_twod_patch_row_major, patchRowMajorTensorRange);
192  gpu_twod_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(2, 2);
193  sycl_device.memcpyDeviceToHost(twod_patch_row_major.data(), gpu_data_twod_patch_row_major, patchTensorBuffSize);
194 
195  VERIFY_IS_EQUAL(twod_patch_row_major.dimension(0), 7);
196  VERIFY_IS_EQUAL(twod_patch_row_major.dimension(1), 3*5);
197  VERIFY_IS_EQUAL(twod_patch_row_major.dimension(2), 2);
198  VERIFY_IS_EQUAL(twod_patch_row_major.dimension(3), 2);
199  VERIFY_IS_EQUAL(twod_patch_row_major.dimension(4), 2);
200 
201 
202  // Based on the calculation described in TensorTraits.h, padding happens to be 0.
203  IndexType row_padding = 0;
204  IndexType col_padding = 0;
205  IndexType stride = 1;
206 
207  for (IndexType i = 0; i < 3; ++i) {
208  for (IndexType j = 0; j < 5; ++j) {
209  IndexType patchId = i+3*j;
210  for (IndexType r = 0; r < 2; ++r) {
211  for (IndexType c = 0; c < 2; ++c) {
212  for (IndexType d = 0; d < 2; ++d) {
213  for (IndexType b = 0; b < 7; ++b) {
214  DataType expected_col_major = 0.0f;
215  DataType expected_row_major = 0.0f;
216  IndexType row_offset = r*stride + i - row_padding;
217  IndexType col_offset = c*stride + j - col_padding;
218  // ColMajor
219  if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor_col_major.dimension(1) && col_offset < tensor_col_major.dimension(2)) {
220  expected_col_major = tensor_col_major(d, row_offset, col_offset, b);
221  }
222  if (twod_patch_col_major(d, r, c, patchId, b) != expected_col_major) {
223  std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
224  }
225  VERIFY_IS_EQUAL(twod_patch_col_major(d, r, c, patchId, b), expected_col_major);
226 
227  // RowMajor
228  if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor_row_major.dimension(2) && col_offset < tensor_row_major.dimension(1)) {
229  expected_row_major = tensor_row_major(b, col_offset, row_offset, d);
230 
231  }
232  if (twod_patch_row_major(b, patchId, c, r, d) != expected_row_major) {
233  std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
234  }
235  VERIFY_IS_EQUAL(twod_patch_row_major(b, patchId, c, r, d), expected_row_major);
236  // Check that ColMajor and RowMajor agree.
237  VERIFY_IS_EQUAL(expected_col_major, expected_row_major);
238  }
239  }
240  }
241  }
242  }
243  }
244 
245  sycl_device.deallocate(gpu_data_col_major);
246  sycl_device.deallocate(gpu_data_row_major);
247  sycl_device.deallocate(gpu_data_single_patch_col_major);
248  sycl_device.deallocate(gpu_data_single_patch_row_major);
249  sycl_device.deallocate(gpu_data_entire_image_patch_col_major);
250  sycl_device.deallocate(gpu_data_entire_image_patch_row_major);
251  sycl_device.deallocate(gpu_data_twod_patch_col_major);
252  sycl_device.deallocate(gpu_data_twod_patch_row_major);
253 
254 }
255 
256 
257 // Verifies VALID padding (no padding) with incrementing values.
258 template <typename DataType, typename IndexType>
259 static void test_patch_padding_valid_sycl(const Eigen::SyclDevice& sycl_device){
260  IndexType input_depth = 3;
261  IndexType input_rows = 3;
262  IndexType input_cols = 3;
263  IndexType input_batches = 1;
264  IndexType ksize = 2; // Corresponds to the Rows and Cols for tensor.extract_image_patches<>.
265  IndexType stride = 2; // Only same stride is supported.
266 
267  array<IndexType, 4> tensorColMajorRange = {{input_depth, input_rows, input_cols, input_batches}};
268  array<IndexType, 4> tensorRowMajorRange = {{input_batches, input_cols, input_rows, input_depth}};
269  Tensor<DataType, 4, DataLayout,IndexType> tensor_col_major(tensorColMajorRange);
270  Tensor<DataType, 4, RowMajor,IndexType> tensor_row_major(tensorRowMajorRange);
271 
272  DataType* gpu_data_col_major = static_cast<DataType*>(sycl_device.allocate(tensor_col_major.size()*sizeof(DataType)));
273  DataType* gpu_data_row_major = static_cast<DataType*>(sycl_device.allocate(tensor_row_major.size()*sizeof(DataType)));
274  TensorMap<Tensor<DataType, 4, ColMajor, IndexType>> gpu_col_major(gpu_data_col_major, tensorColMajorRange);
275  TensorMap<Tensor<DataType, 4, RowMajor, IndexType>> gpu_row_major(gpu_data_row_major, tensorRowMajorRange);
276 
277  sycl_device.memcpyHostToDevice(gpu_data_col_major, tensor_col_major.data(),(tensor_col_major.size())*sizeof(DataType));
278  gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout();
279  sycl_device.memcpyDeviceToHost(tensor_row_major.data(), gpu_data_row_major, (tensor_col_major.size())*sizeof(DataType));
280 
281  VERIFY_IS_EQUAL(tensor_col_major.dimension(0), tensor_row_major.dimension(3));
282  VERIFY_IS_EQUAL(tensor_col_major.dimension(1), tensor_row_major.dimension(2));
283  VERIFY_IS_EQUAL(tensor_col_major.dimension(2), tensor_row_major.dimension(1));
284  VERIFY_IS_EQUAL(tensor_col_major.dimension(3), tensor_row_major.dimension(0));
285 
286  // Initializes tensor with incrementing numbers.
287  for (IndexType i = 0; i < tensor_col_major.size(); ++i) {
288  tensor_col_major.data()[i] = i + 1;
289  }
290  // ColMajor
291  array<IndexType, 5> patchColMajorTensorRange={{input_depth, ksize, ksize, 1, input_batches}};
292  Tensor<DataType, 5, DataLayout,IndexType> result_col_major(patchColMajorTensorRange);
293  size_t patchTensorBuffSize =result_col_major.size()*sizeof(DataType);
294  DataType* gpu_data_result_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
295  TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_result_col_major(gpu_data_result_col_major, patchColMajorTensorRange);
296  gpu_result_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(ksize, ksize, stride, stride, 1, 1, PADDING_VALID);
297  sycl_device.memcpyDeviceToHost(result_col_major.data(), gpu_data_result_col_major, patchTensorBuffSize);
298 
299  VERIFY_IS_EQUAL(result_col_major.dimension(0), input_depth); // depth
300  VERIFY_IS_EQUAL(result_col_major.dimension(1), ksize); // kernel rows
301  VERIFY_IS_EQUAL(result_col_major.dimension(2), ksize); // kernel cols
302  VERIFY_IS_EQUAL(result_col_major.dimension(3), 1); // number of patches
303  VERIFY_IS_EQUAL(result_col_major.dimension(4), input_batches); // number of batches
304 
305  // RowMajor
306  array<IndexType, 5> patchRowMajorTensorRange={{input_batches, 1, ksize, ksize, input_depth }};
307  Tensor<DataType, 5, RowMajor,IndexType> result_row_major(patchRowMajorTensorRange);
308  patchTensorBuffSize =result_row_major.size()*sizeof(DataType);
309  DataType* gpu_data_result_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
310  TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_result_row_major(gpu_data_result_row_major, patchRowMajorTensorRange);
311  gpu_result_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(ksize, ksize, stride, stride, 1, 1, PADDING_VALID);
312  sycl_device.memcpyDeviceToHost(result_row_major.data(), gpu_data_result_row_major, patchTensorBuffSize);
313 
314  VERIFY_IS_EQUAL(result_col_major.dimension(0), result_row_major.dimension(4));
315  VERIFY_IS_EQUAL(result_col_major.dimension(1), result_row_major.dimension(3));
316  VERIFY_IS_EQUAL(result_col_major.dimension(2), result_row_major.dimension(2));
317  VERIFY_IS_EQUAL(result_col_major.dimension(3), result_row_major.dimension(1));
318  VERIFY_IS_EQUAL(result_col_major.dimension(4), result_row_major.dimension(0));
319 
320  // No padding is carried out.
321  IndexType row_padding = 0;
322  IndexType col_padding = 0;
323 
324  for (IndexType i = 0; (i+stride+ksize-1) < input_rows; i += stride) { // input rows
325  for (IndexType j = 0; (j+stride+ksize-1) < input_cols; j += stride) { // input cols
326  IndexType patchId = i+input_rows*j;
327  for (IndexType r = 0; r < ksize; ++r) { // patch rows
328  for (IndexType c = 0; c < ksize; ++c) { // patch cols
329  for (IndexType d = 0; d < input_depth; ++d) { // depth
330  for (IndexType b = 0; b < input_batches; ++b) { // batch
331  DataType expected_col_major = 0.0f;
332  DataType expected_row_major = 0.0f;
333  IndexType row_offset = r + i - row_padding;
334  IndexType col_offset = c + j - col_padding;
335  if (row_offset >= 0 && col_offset >= 0 && row_offset < input_rows && col_offset < input_cols) {
336  expected_col_major = tensor_col_major(d, row_offset, col_offset, b);
337  expected_row_major = tensor_row_major(b, col_offset, row_offset, d);
338  }
339  // ColMajor
340  if (result_col_major(d, r, c, patchId, b) != expected_col_major) {
341  std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
342  }
343  VERIFY_IS_EQUAL(result_col_major(d, r, c, patchId, b), expected_col_major);
344  // RowMajor
345  if (result_row_major(b, patchId, c, r, d) != expected_row_major) {
346  std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
347  }
348  VERIFY_IS_EQUAL(result_row_major(b, patchId, c, r, d), expected_row_major);
349  // Check that ColMajor and RowMajor agree.
350  VERIFY_IS_EQUAL(expected_col_major, expected_row_major);
351  }
352  }
353  }
354  }
355  }
356  }
357  sycl_device.deallocate(gpu_data_col_major);
358  sycl_device.deallocate(gpu_data_row_major);
359  sycl_device.deallocate(gpu_data_result_col_major);
360  sycl_device.deallocate(gpu_data_result_row_major);
361 }
362 
363 // Verifies VALID padding (no padding) with the same value.
364 template <typename DataType, typename IndexType>
365 static void test_patch_padding_valid_same_value_sycl(const Eigen::SyclDevice& sycl_device){
366  IndexType input_depth = 1;
367  IndexType input_rows = 5;
368  IndexType input_cols = 5;
369  IndexType input_batches = 2;
370  IndexType ksize = 3; // Corresponds to the Rows and Cols for tensor.extract_image_patches<>.
371  IndexType stride = 2; // Only same stride is supported.
372  // ColMajor
373 
374  array<IndexType, 4> tensorColMajorRange = {{input_depth, input_rows, input_cols, input_batches}};
375  array<IndexType, 4> tensorRowMajorRange = {{input_batches, input_cols, input_rows, input_depth}};
376  Tensor<DataType, 4, DataLayout,IndexType> tensor_col_major(tensorColMajorRange);
377  Tensor<DataType, 4, RowMajor,IndexType> tensor_row_major(tensorRowMajorRange);
378 
379  DataType* gpu_data_col_major = static_cast<DataType*>(sycl_device.allocate(tensor_col_major.size()*sizeof(DataType)));
380  DataType* gpu_data_row_major = static_cast<DataType*>(sycl_device.allocate(tensor_row_major.size()*sizeof(DataType)));
381  TensorMap<Tensor<DataType, 4, ColMajor, IndexType>> gpu_col_major(gpu_data_col_major, tensorColMajorRange);
382  TensorMap<Tensor<DataType, 4, RowMajor, IndexType>> gpu_row_major(gpu_data_row_major, tensorRowMajorRange);
383  gpu_col_major.device(sycl_device)=gpu_col_major.constant(11.0f);
384  gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout();
385  sycl_device.memcpyDeviceToHost(tensor_col_major.data(), gpu_data_col_major, (tensor_col_major.size())*sizeof(DataType));
386  sycl_device.memcpyDeviceToHost(tensor_row_major.data(), gpu_data_row_major, (tensor_row_major.size())*sizeof(DataType));
387  VERIFY_IS_EQUAL(tensor_col_major.dimension(0), tensor_row_major.dimension(3));
388  VERIFY_IS_EQUAL(tensor_col_major.dimension(1), tensor_row_major.dimension(2));
389  VERIFY_IS_EQUAL(tensor_col_major.dimension(2), tensor_row_major.dimension(1));
390  VERIFY_IS_EQUAL(tensor_col_major.dimension(3), tensor_row_major.dimension(0));
391 
392  array<IndexType, 5> patchColMajorTensorRange={{input_depth, ksize, ksize, 4, input_batches}};
393  Tensor<DataType, 5, DataLayout,IndexType> result_col_major(patchColMajorTensorRange);
394  size_t patchTensorBuffSize =result_col_major.size()*sizeof(DataType);
395  DataType* gpu_data_result_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
396  TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_result_col_major(gpu_data_result_col_major, patchColMajorTensorRange);
397  gpu_result_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(ksize, ksize, stride, stride, 1, 1, PADDING_VALID);
398  sycl_device.memcpyDeviceToHost(result_col_major.data(), gpu_data_result_col_major, patchTensorBuffSize);
399 
400  VERIFY_IS_EQUAL(result_col_major.dimension(0), input_depth); // depth
401  VERIFY_IS_EQUAL(result_col_major.dimension(1), ksize); // kernel rows
402  VERIFY_IS_EQUAL(result_col_major.dimension(2), ksize); // kernel cols
403  VERIFY_IS_EQUAL(result_col_major.dimension(3), 4); // number of patches
404  VERIFY_IS_EQUAL(result_col_major.dimension(4), input_batches); // number of batches
405 
406  // RowMajor
407  array<IndexType, 5> patchRowMajorTensorRange={{input_batches, 4, ksize, ksize, input_depth }};
408  Tensor<DataType, 5, RowMajor,IndexType> result_row_major(patchRowMajorTensorRange);
409  patchTensorBuffSize =result_row_major.size()*sizeof(DataType);
410  DataType* gpu_data_result_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
411  TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_result_row_major(gpu_data_result_row_major, patchRowMajorTensorRange);
412  gpu_result_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(ksize, ksize, stride, stride, 1, 1, PADDING_VALID);
413  sycl_device.memcpyDeviceToHost(result_row_major.data(), gpu_data_result_row_major, patchTensorBuffSize);
414 
415  VERIFY_IS_EQUAL(result_col_major.dimension(0), result_row_major.dimension(4));
416  VERIFY_IS_EQUAL(result_col_major.dimension(1), result_row_major.dimension(3));
417  VERIFY_IS_EQUAL(result_col_major.dimension(2), result_row_major.dimension(2));
418  VERIFY_IS_EQUAL(result_col_major.dimension(3), result_row_major.dimension(1));
419  VERIFY_IS_EQUAL(result_col_major.dimension(4), result_row_major.dimension(0));
420 
421  // No padding is carried out.
422  IndexType row_padding = 0;
423  IndexType col_padding = 0;
424 
425  for (IndexType i = 0; (i+stride+ksize-1) <= input_rows; i += stride) { // input rows
426  for (IndexType j = 0; (j+stride+ksize-1) <= input_cols; j += stride) { // input cols
427  IndexType patchId = i+input_rows*j;
428  for (IndexType r = 0; r < ksize; ++r) { // patch rows
429  for (IndexType c = 0; c < ksize; ++c) { // patch cols
430  for (IndexType d = 0; d < input_depth; ++d) { // depth
431  for (IndexType b = 0; b < input_batches; ++b) { // batch
432  DataType expected_col_major = 0.0f;
433  DataType expected_row_major = 0.0f;
434  IndexType row_offset = r + i - row_padding;
435  IndexType col_offset = c + j - col_padding;
436  if (row_offset >= 0 && col_offset >= 0 && row_offset < input_rows && col_offset < input_cols) {
437  expected_col_major = tensor_col_major(d, row_offset, col_offset, b);
438  expected_row_major = tensor_row_major(b, col_offset, row_offset, d);
439  }
440  // ColMajor
441  if (result_col_major(d, r, c, patchId, b) != expected_col_major) {
442  std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
443  }
444  VERIFY_IS_EQUAL(result_col_major(d, r, c, patchId, b), expected_col_major);
445  // RowMajor
446  if (result_row_major(b, patchId, c, r, d) != expected_row_major) {
447  std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
448  }
449  VERIFY_IS_EQUAL(result_row_major(b, patchId, c, r, d), expected_row_major);
450  // Check that ColMajor and RowMajor agree.
451  VERIFY_IS_EQUAL(expected_col_major, expected_row_major);
452  }
453  }
454  }
455  }
456  }
457  }
458 }
459 
460 // Verifies SAME padding.
461 template <typename DataType, typename IndexType>
462 static void test_patch_padding_same_sycl(const Eigen::SyclDevice& sycl_device){
463  IndexType input_depth = 3;
464  IndexType input_rows = 4;
465  IndexType input_cols = 2;
466  IndexType input_batches = 1;
467  IndexType ksize = 2; // Corresponds to the Rows and Cols for tensor.extract_image_patches<>.
468  IndexType stride = 2; // Only same stride is supported.
469 
470  // ColMajor
471  array<IndexType, 4> tensorColMajorRange = {{input_depth, input_rows, input_cols, input_batches}};
472  array<IndexType, 4> tensorRowMajorRange = {{input_batches, input_cols, input_rows, input_depth}};
473  Tensor<DataType, 4, DataLayout,IndexType> tensor_col_major(tensorColMajorRange);
474  Tensor<DataType, 4, RowMajor,IndexType> tensor_row_major(tensorRowMajorRange);
475 
476  DataType* gpu_data_col_major = static_cast<DataType*>(sycl_device.allocate(tensor_col_major.size()*sizeof(DataType)));
477  DataType* gpu_data_row_major = static_cast<DataType*>(sycl_device.allocate(tensor_row_major.size()*sizeof(DataType)));
478  TensorMap<Tensor<DataType, 4, ColMajor, IndexType>> gpu_col_major(gpu_data_col_major, tensorColMajorRange);
479  TensorMap<Tensor<DataType, 4, RowMajor, IndexType>> gpu_row_major(gpu_data_row_major, tensorRowMajorRange);
480 
481  sycl_device.memcpyHostToDevice(gpu_data_col_major, tensor_col_major.data(),(tensor_col_major.size())*sizeof(DataType));
482  gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout();
483  sycl_device.memcpyDeviceToHost(tensor_row_major.data(), gpu_data_row_major, (tensor_col_major.size())*sizeof(DataType));
484 
485  VERIFY_IS_EQUAL(tensor_col_major.dimension(0), tensor_row_major.dimension(3));
486  VERIFY_IS_EQUAL(tensor_col_major.dimension(1), tensor_row_major.dimension(2));
487  VERIFY_IS_EQUAL(tensor_col_major.dimension(2), tensor_row_major.dimension(1));
488  VERIFY_IS_EQUAL(tensor_col_major.dimension(3), tensor_row_major.dimension(0));
489 
490  // Initializes tensor with incrementing numbers.
491  for (IndexType i = 0; i < tensor_col_major.size(); ++i) {
492  tensor_col_major.data()[i] = i + 1;
493  }
494 
495 array<IndexType, 5> patchColMajorTensorRange={{input_depth, ksize, ksize, 2, input_batches}};
496 Tensor<DataType, 5, DataLayout,IndexType> result_col_major(patchColMajorTensorRange);
497 size_t patchTensorBuffSize =result_col_major.size()*sizeof(DataType);
498 DataType* gpu_data_result_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
499 TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_result_col_major(gpu_data_result_col_major, patchColMajorTensorRange);
500 gpu_result_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(ksize, ksize, stride, stride, PADDING_SAME);
501 sycl_device.memcpyDeviceToHost(result_col_major.data(), gpu_data_result_col_major, patchTensorBuffSize);
502 
503 
504  VERIFY_IS_EQUAL(result_col_major.dimension(0), input_depth); // depth
505  VERIFY_IS_EQUAL(result_col_major.dimension(1), ksize); // kernel rows
506  VERIFY_IS_EQUAL(result_col_major.dimension(2), ksize); // kernel cols
507  VERIFY_IS_EQUAL(result_col_major.dimension(3), 2); // number of patches
508  VERIFY_IS_EQUAL(result_col_major.dimension(4), input_batches); // number of batches
509 
510  // RowMajor
511 
512  array<IndexType, 5> patchRowMajorTensorRange={{input_batches, 2, ksize, ksize, input_depth }};
513  Tensor<DataType, 5, RowMajor,IndexType> result_row_major(patchRowMajorTensorRange);
514  patchTensorBuffSize =result_row_major.size()*sizeof(DataType);
515  DataType* gpu_data_result_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
516  TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_result_row_major(gpu_data_result_row_major, patchRowMajorTensorRange);
517  gpu_result_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(ksize, ksize, stride, stride, PADDING_SAME);
518  sycl_device.memcpyDeviceToHost(result_row_major.data(), gpu_data_result_row_major, patchTensorBuffSize);
519 
520  VERIFY_IS_EQUAL(result_col_major.dimension(0), result_row_major.dimension(4));
521  VERIFY_IS_EQUAL(result_col_major.dimension(1), result_row_major.dimension(3));
522  VERIFY_IS_EQUAL(result_col_major.dimension(2), result_row_major.dimension(2));
523  VERIFY_IS_EQUAL(result_col_major.dimension(3), result_row_major.dimension(1));
524  VERIFY_IS_EQUAL(result_col_major.dimension(4), result_row_major.dimension(0));
525 
526  // Based on the calculation described in TensorTraits.h, padding happens to be 0.
527  IndexType row_padding = 0;
528  IndexType col_padding = 0;
529 
530  for (IndexType i = 0; (i+stride+ksize-1) <= input_rows; i += stride) { // input rows
531  for (IndexType j = 0; (j+stride+ksize-1) <= input_cols; j += stride) { // input cols
532  IndexType patchId = i+input_rows*j;
533  for (IndexType r = 0; r < ksize; ++r) { // patch rows
534  for (IndexType c = 0; c < ksize; ++c) { // patch cols
535  for (IndexType d = 0; d < input_depth; ++d) { // depth
536  for (IndexType b = 0; b < input_batches; ++b) { // batch
537  DataType expected_col_major = 0.0f;
538  DataType expected_row_major = 0.0f;
539  IndexType row_offset = r*stride + i - row_padding;
540  IndexType col_offset = c*stride + j - col_padding;
541  if (row_offset >= 0 && col_offset >= 0 && row_offset < input_rows && col_offset < input_cols) {
542  expected_col_major = tensor_col_major(d, row_offset, col_offset, b);
543  expected_row_major = tensor_row_major(b, col_offset, row_offset, d);
544  }
545  // ColMajor
546  if (result_col_major(d, r, c, patchId, b) != expected_col_major) {
547  std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
548  }
549  VERIFY_IS_EQUAL(result_col_major(d, r, c, patchId, b), expected_col_major);
550  // RowMajor
551  if (result_row_major(b, patchId, c, r, d) != expected_row_major) {
552  std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
553  }
554  VERIFY_IS_EQUAL(result_row_major(b, patchId, c, r, d), expected_row_major);
555  // Check that ColMajor and RowMajor agree.
556  VERIFY_IS_EQUAL(expected_col_major, expected_row_major);
557  }
558  }
559  }
560  }
561  }
562  }
563 }
564 
565 
566 template <typename DataType, typename IndexType>
567 static void test_patch_no_extra_dim_sycl(const Eigen::SyclDevice& sycl_device){
568 
569  IndexType sizeDim1 = 2;
570  IndexType sizeDim2 = 3;
571  IndexType sizeDim3 = 5;
572 
573  // ColMajor
574  array<IndexType, 3> tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3}};
575  array<IndexType, 3> tensorRowMajorRange = {{sizeDim3, sizeDim2, sizeDim1}};
576  Tensor<DataType, 3, DataLayout,IndexType> tensor_col_major(tensorColMajorRange);
577  tensor_col_major.setRandom();
578  Tensor<DataType, 3, RowMajor,IndexType> tensor_row_major(tensorRowMajorRange);
579 
580  DataType* gpu_data_col_major = static_cast<DataType*>(sycl_device.allocate(tensor_col_major.size()*sizeof(DataType)));
581  DataType* gpu_data_row_major = static_cast<DataType*>(sycl_device.allocate(tensor_row_major.size()*sizeof(DataType)));
582  TensorMap<Tensor<DataType, 3, ColMajor, IndexType>> gpu_col_major(gpu_data_col_major, tensorColMajorRange);
583  TensorMap<Tensor<DataType, 3, RowMajor, IndexType>> gpu_row_major(gpu_data_row_major, tensorRowMajorRange);
584 
585  sycl_device.memcpyHostToDevice(gpu_data_col_major, tensor_col_major.data(),(tensor_col_major.size())*sizeof(DataType));
586  gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout();
587  sycl_device.memcpyDeviceToHost(tensor_row_major.data(), gpu_data_row_major, (tensor_row_major.size())*sizeof(DataType));
588 
589  VERIFY_IS_EQUAL(tensor_col_major.dimension(0), tensor_row_major.dimension(2));
590  VERIFY_IS_EQUAL(tensor_col_major.dimension(1), tensor_row_major.dimension(1));
591  VERIFY_IS_EQUAL(tensor_col_major.dimension(2), tensor_row_major.dimension(0));
592 
593 
594  // Single pixel patch: ColMajor
595  array<IndexType, 4> patchColMajorTensorRange={{sizeDim1, 1, 1, sizeDim2*sizeDim3}};
596  Tensor<DataType, 4, DataLayout,IndexType> single_patch_col_major(patchColMajorTensorRange);
597  size_t patchTensorBuffSize =single_patch_col_major.size()*sizeof(DataType);
598  DataType* gpu_data_single_patch_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
599  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_single_patch_col_major(gpu_data_single_patch_col_major, patchColMajorTensorRange);
600  gpu_single_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(1, 1);
601  sycl_device.memcpyDeviceToHost(single_patch_col_major.data(), gpu_data_single_patch_col_major, patchTensorBuffSize);
602 
603  VERIFY_IS_EQUAL(single_patch_col_major.dimension(0), sizeDim1);
604  VERIFY_IS_EQUAL(single_patch_col_major.dimension(1), 1);
605  VERIFY_IS_EQUAL(single_patch_col_major.dimension(2), 1);
606  VERIFY_IS_EQUAL(single_patch_col_major.dimension(3), sizeDim2*sizeDim3);
607 
608  // Single pixel patch: RowMajor
609  array<IndexType, 4> patchRowMajorTensorRange={{sizeDim2*sizeDim3, 1, 1, sizeDim1}};
610  Tensor<DataType, 4, RowMajor,IndexType> single_patch_row_major(patchRowMajorTensorRange);
611  patchTensorBuffSize =single_patch_row_major.size()*sizeof(DataType);
612  DataType* gpu_data_single_patch_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
613  TensorMap<Tensor<DataType, 4, RowMajor,IndexType>> gpu_single_patch_row_major(gpu_data_single_patch_row_major, patchRowMajorTensorRange);
614  gpu_single_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(1, 1);
615  sycl_device.memcpyDeviceToHost(single_patch_row_major.data(), gpu_data_single_patch_row_major, patchTensorBuffSize);
616 
617  VERIFY_IS_EQUAL(single_patch_row_major.dimension(0), sizeDim2*sizeDim3);
618  VERIFY_IS_EQUAL(single_patch_row_major.dimension(1), 1);
619  VERIFY_IS_EQUAL(single_patch_row_major.dimension(2), 1);
620  VERIFY_IS_EQUAL(single_patch_row_major.dimension(3), sizeDim1);
621 
622  for (IndexType i = 0; i < tensor_col_major.size(); ++i) {
623  // ColMajor
624  if (tensor_col_major.data()[i] != single_patch_col_major.data()[i]) {
625  std::cout << "Mismatch detected at index " << i << " : " << tensor_col_major.data()[i] << " vs " << single_patch_col_major.data()[i] << std::endl;
626  }
627  VERIFY_IS_EQUAL(single_patch_col_major.data()[i], tensor_col_major.data()[i]);
628  // RowMajor
629  if (tensor_row_major.data()[i] != single_patch_row_major.data()[i]) {
630  std::cout << "Mismatch detected at index " << i << " : "
631  << tensor_col_major.data()[i] << " vs "
632  << single_patch_row_major.data()[i] << std::endl;
633  }
634  VERIFY_IS_EQUAL(single_patch_row_major.data()[i],
635  tensor_row_major.data()[i]);
636  VERIFY_IS_EQUAL(tensor_col_major.data()[i], tensor_row_major.data()[i]);
637  VERIFY_IS_EQUAL(single_patch_col_major.data()[i],
638  single_patch_row_major.data()[i]);
639  }
640 
641  // Entire image patch: ColMajor
642  patchColMajorTensorRange={{sizeDim1, sizeDim2, sizeDim3, sizeDim2*sizeDim3}};
643  Tensor<DataType, 4, DataLayout,IndexType> entire_image_patch_col_major(patchColMajorTensorRange);
644  patchTensorBuffSize =entire_image_patch_col_major.size()*sizeof(DataType);
645  DataType* gpu_data_entire_image_patch_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
646  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_entire_image_patch_col_major(gpu_data_entire_image_patch_col_major, patchColMajorTensorRange);
647  gpu_entire_image_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(3, 5);
648  sycl_device.memcpyDeviceToHost(entire_image_patch_col_major.data(), gpu_data_entire_image_patch_col_major, patchTensorBuffSize);
649 
650  VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(0), 2);
651  VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(1), 3);
652  VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(2), 5);
653  VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(3), 3*5);
654 
655  // Entire image patch: RowMajor
656 patchRowMajorTensorRange={{sizeDim2*sizeDim3, sizeDim3, sizeDim2, sizeDim1}};
657 Tensor<DataType, 4, RowMajor,IndexType> entire_image_patch_row_major(patchRowMajorTensorRange);
658 patchTensorBuffSize =entire_image_patch_row_major.size()*sizeof(DataType);
659 DataType* gpu_data_entire_image_patch_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
660 TensorMap<Tensor<DataType, 4, RowMajor,IndexType>> gpu_entire_image_patch_row_major(gpu_data_entire_image_patch_row_major, patchRowMajorTensorRange);
661 gpu_entire_image_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(3, 5);
662 sycl_device.memcpyDeviceToHost(entire_image_patch_row_major.data(), gpu_data_entire_image_patch_row_major, patchTensorBuffSize);
663  VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(0), 3*5);
664  VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(1), 5);
665  VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(2), 3);
666  VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(3), 2);
667 
668  for (IndexType i = 0; i < 3; ++i) {
669  for (IndexType j = 0; j < 5; ++j) {
670  IndexType patchId = i+3*j;
671  for (IndexType r = 0; r < 3; ++r) {
672  for (IndexType c = 0; c < 5; ++c) {
673  for (IndexType d = 0; d < 2; ++d) {
674  DataType expected_col_major = 0.0f;
675  DataType expected_row_major = 0.0f;
676  if (r-1+i >= 0 && c-2+j >= 0 && r-1+i < 3 && c-2+j < 5) {
677  expected_col_major = tensor_col_major(d, r-1+i, c-2+j);
678  expected_row_major = tensor_row_major(c-2+j, r-1+i, d);
679  }
680  // ColMajor
681  if (entire_image_patch_col_major(d, r, c, patchId) != expected_col_major) {
682  std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl;
683  }
684  VERIFY_IS_EQUAL(entire_image_patch_col_major(d, r, c, patchId), expected_col_major);
685  // RowMajor
686  if (entire_image_patch_row_major(patchId, c, r, d) !=
687  expected_row_major) {
688  std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl;
689  }
690  VERIFY_IS_EQUAL(entire_image_patch_row_major(patchId, c, r, d),
691  expected_row_major);
692  // Check that ColMajor and RowMajor agree.
693  VERIFY_IS_EQUAL(expected_col_major, expected_row_major);
694  }
695  }
696  }
697  }
698  }
699 
700  // 2D patch: ColMajor
701  patchColMajorTensorRange={{sizeDim1, 2, 2, sizeDim2*sizeDim3}};
702  Tensor<DataType, 4, DataLayout,IndexType> twod_patch_col_major(patchColMajorTensorRange);
703  patchTensorBuffSize =twod_patch_col_major.size()*sizeof(DataType);
704  DataType* gpu_data_twod_patch_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
705  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_twod_patch_col_major(gpu_data_twod_patch_col_major, patchColMajorTensorRange);
706  gpu_twod_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(2, 2);
707  sycl_device.memcpyDeviceToHost(twod_patch_col_major.data(), gpu_data_twod_patch_col_major, patchTensorBuffSize);
708 
709  VERIFY_IS_EQUAL(twod_patch_col_major.dimension(0), 2);
710  VERIFY_IS_EQUAL(twod_patch_col_major.dimension(1), 2);
711  VERIFY_IS_EQUAL(twod_patch_col_major.dimension(2), 2);
712  VERIFY_IS_EQUAL(twod_patch_col_major.dimension(3), 3*5);
713 
714  // 2D patch: RowMajor
715  patchRowMajorTensorRange={{sizeDim2*sizeDim3, 2, 2, sizeDim1}};
716  Tensor<DataType, 4, RowMajor,IndexType> twod_patch_row_major(patchRowMajorTensorRange);
717  patchTensorBuffSize =twod_patch_row_major.size()*sizeof(DataType);
718  DataType* gpu_data_twod_patch_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
719  TensorMap<Tensor<DataType, 4, RowMajor,IndexType>> gpu_twod_patch_row_major(gpu_data_twod_patch_row_major, patchRowMajorTensorRange);
720  gpu_twod_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(2, 2);
721  sycl_device.memcpyDeviceToHost(twod_patch_row_major.data(), gpu_data_twod_patch_row_major, patchTensorBuffSize);
722  VERIFY_IS_EQUAL(twod_patch_row_major.dimension(0), 3*5);
723  VERIFY_IS_EQUAL(twod_patch_row_major.dimension(1), 2);
724  VERIFY_IS_EQUAL(twod_patch_row_major.dimension(2), 2);
725  VERIFY_IS_EQUAL(twod_patch_row_major.dimension(3), 2);
726 
727  // Based on the calculation described in TensorTraits.h, padding happens to be 0.
728  IndexType row_padding = 0;
729  IndexType col_padding = 0;
730  IndexType stride = 1;
731 
732  for (IndexType i = 0; i < 3; ++i) {
733  for (IndexType j = 0; j < 5; ++j) {
734  IndexType patchId = i+3*j;
735  for (IndexType r = 0; r < 2; ++r) {
736  for (IndexType c = 0; c < 2; ++c) {
737  for (IndexType d = 0; d < 2; ++d) {
738  DataType expected_col_major = 0.0f;
739  DataType expected_row_major = 0.0f;
740  IndexType row_offset = r*stride + i - row_padding;
741  IndexType col_offset = c*stride + j - col_padding;
742  // ColMajor
743  if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor_col_major.dimension(1) && col_offset < tensor_col_major.dimension(2)) {
744  expected_col_major = tensor_col_major(d, row_offset, col_offset);
745  }
746  if (twod_patch_col_major(d, r, c, patchId) != expected_col_major) {
747  std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl;
748  }
749  VERIFY_IS_EQUAL(twod_patch_col_major(d, r, c, patchId), expected_col_major);
750  // RowMajor
751  if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor_row_major.dimension(1) && col_offset < tensor_row_major.dimension(0)) {
752  expected_row_major = tensor_row_major(col_offset, row_offset, d);
753  }
754  if (twod_patch_row_major(patchId, c, r, d) != expected_row_major) {
755  std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl;
756  }
757  VERIFY_IS_EQUAL(twod_patch_row_major(patchId, c, r, d), expected_row_major);
758  // Check that ColMajor and RowMajor agree.
759  VERIFY_IS_EQUAL(expected_col_major, expected_row_major);
760  }
761  }
762  }
763  }
764  }
765 
766  sycl_device.deallocate(gpu_data_col_major);
767  sycl_device.deallocate(gpu_data_row_major);
768  sycl_device.deallocate(gpu_data_single_patch_col_major);
769  sycl_device.deallocate(gpu_data_single_patch_row_major);
770  sycl_device.deallocate(gpu_data_entire_image_patch_col_major);
771  sycl_device.deallocate(gpu_data_entire_image_patch_row_major);
772  sycl_device.deallocate(gpu_data_twod_patch_col_major);
773  sycl_device.deallocate(gpu_data_twod_patch_row_major);
774 }
775 
776 template <typename DataType, typename IndexType>
777 static void test_imagenet_patches_sycl(const Eigen::SyclDevice& sycl_device)
778 {
779  // Test the code on typical configurations used by the 'imagenet' benchmarks at
780  // https://github.com/soumith/convnet-benchmarks
781  // ColMajor
782  IndexType sizeDim1 = 3;
783  IndexType sizeDim2 = 128;
784  IndexType sizeDim3 = 128;
785  IndexType sizeDim4 = 16;
786  array<IndexType, 4> tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
787  Tensor<DataType, 4, DataLayout,IndexType> l_in_col_major(tensorColMajorRange);
788  l_in_col_major.setRandom();
789 
790  DataType* gpu_data_l_in_col_major = static_cast<DataType*>(sycl_device.allocate(l_in_col_major.size()*sizeof(DataType)));
791  TensorMap<Tensor<DataType, 4, ColMajor, IndexType>> gpu_l_in_col_major(gpu_data_l_in_col_major, tensorColMajorRange);
792 
793  sycl_device.memcpyHostToDevice(gpu_data_l_in_col_major, l_in_col_major.data(),(l_in_col_major.size())*sizeof(DataType));
794 
795  array<IndexType, 5> patchTensorRange={{sizeDim1, 11, 11, sizeDim2*sizeDim3, sizeDim4}};
796  Tensor<DataType, 5, DataLayout,IndexType> l_out_col_major(patchTensorRange);
797  size_t patchTensorBuffSize =l_out_col_major.size()*sizeof(DataType);
798  DataType* gpu_data_l_out_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
799  TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_l_out_col_major(gpu_data_l_out_col_major, patchTensorRange);
800  gpu_l_out_col_major.device(sycl_device)=gpu_l_in_col_major.extract_image_patches(11, 11);
801  sycl_device.memcpyDeviceToHost(l_out_col_major.data(), gpu_data_l_out_col_major, patchTensorBuffSize);
802 
803  VERIFY_IS_EQUAL(l_out_col_major.dimension(0), sizeDim1);
804  VERIFY_IS_EQUAL(l_out_col_major.dimension(1), 11);
805  VERIFY_IS_EQUAL(l_out_col_major.dimension(2), 11);
806  VERIFY_IS_EQUAL(l_out_col_major.dimension(3), sizeDim2*sizeDim3);
807  VERIFY_IS_EQUAL(l_out_col_major.dimension(4), sizeDim4);
808 
809  // RowMajor
810  patchTensorRange={{sizeDim4, sizeDim2*sizeDim3, 11, 11, sizeDim1}};
811  Tensor<DataType, 5, RowMajor,IndexType> l_out_row_major(patchTensorRange);
812  patchTensorBuffSize =l_out_row_major.size()*sizeof(DataType);
813  DataType* gpu_data_l_out_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
814  TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_l_out_row_major(gpu_data_l_out_row_major, patchTensorRange);
815  gpu_l_out_row_major.device(sycl_device)=gpu_l_in_col_major.swap_layout().extract_image_patches(11, 11);
816  sycl_device.memcpyDeviceToHost(l_out_row_major.data(), gpu_data_l_out_row_major, patchTensorBuffSize);
817 
818  VERIFY_IS_EQUAL(l_out_row_major.dimension(0), sizeDim4);
819  VERIFY_IS_EQUAL(l_out_row_major.dimension(1), sizeDim2*sizeDim3);
820  VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 11);
821  VERIFY_IS_EQUAL(l_out_row_major.dimension(3), 11);
822  VERIFY_IS_EQUAL(l_out_row_major.dimension(4), sizeDim1);
823 
824  for (IndexType b = 0; b < 16; ++b) {
825  for (IndexType i = 0; i < 128; ++i) {
826  for (IndexType j = 0; j < 128; ++j) {
827  IndexType patchId = i+128*j;
828  for (IndexType c = 0; c < 11; ++c) {
829  for (IndexType r = 0; r < 11; ++r) {
830  for (IndexType d = 0; d < 3; ++d) {
831  DataType expected = 0.0f;
832  if (r-5+i >= 0 && c-5+j >= 0 && r-5+i < 128 && c-5+j < 128) {
833  expected = l_in_col_major(d, r-5+i, c-5+j, b);
834  }
835  // ColMajor
836  if (l_out_col_major(d, r, c, patchId, b) != expected) {
837  std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
838  }
839  VERIFY_IS_EQUAL(l_out_col_major(d, r, c, patchId, b), expected);
840  // RowMajor
841  if (l_out_row_major(b, patchId, c, r, d) !=
842  expected) {
843  std::cout << "Mismatch detected at index i=" << i << " j=" << j
844  << " r=" << r << " c=" << c << " d=" << d << " b=" << b
845  << std::endl;
846  }
847  VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d),
848  expected);
849  }
850  }
851  }
852  }
853  }
854  }
855 
856  // ColMajor
857  sycl_device.deallocate(gpu_data_l_in_col_major);
858  sycl_device.deallocate(gpu_data_l_out_col_major);
859  sizeDim1 = 16;
860  sizeDim2 = 64;
861  sizeDim3 = 64;
862  sizeDim4 = 32;
863  tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
864  l_in_col_major.resize(tensorColMajorRange);
865  l_in_col_major.setRandom();
866  gpu_data_l_in_col_major = static_cast<DataType*>(sycl_device.allocate(l_in_col_major.size()*sizeof(DataType)));
867  TensorMap<Tensor<DataType, 4, ColMajor, IndexType>>gpu_l_in_col_major_resize1(gpu_data_l_in_col_major, tensorColMajorRange);
868 
869  patchTensorRange={{sizeDim1, 9, 9, sizeDim2*sizeDim3, sizeDim4}};
870  l_out_col_major.resize(patchTensorRange);
871  patchTensorBuffSize =l_out_col_major.size()*sizeof(DataType);
872  gpu_data_l_out_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
873  TensorMap<Tensor<DataType, 5, DataLayout,IndexType>>gpu_l_out_col_major_resize1(gpu_data_l_out_col_major, patchTensorRange);
874  sycl_device.memcpyHostToDevice(gpu_data_l_in_col_major, l_in_col_major.data(),(l_in_col_major.size())*sizeof(DataType));
875  gpu_l_out_col_major_resize1.device(sycl_device)=gpu_l_in_col_major_resize1.extract_image_patches(9, 9);
876  sycl_device.memcpyDeviceToHost(l_out_col_major.data(), gpu_data_l_out_col_major, patchTensorBuffSize);
877  VERIFY_IS_EQUAL(l_out_col_major.dimension(0), 16);
878  VERIFY_IS_EQUAL(l_out_col_major.dimension(1), 9);
879  VERIFY_IS_EQUAL(l_out_col_major.dimension(2), 9);
880  VERIFY_IS_EQUAL(l_out_col_major.dimension(3), 64*64);
881  VERIFY_IS_EQUAL(l_out_col_major.dimension(4), 32);
882 
883 // RowMajor
884  sycl_device.deallocate(gpu_data_l_out_row_major);
885  patchTensorRange={{sizeDim4, sizeDim2*sizeDim3, 9, 9 ,sizeDim1}};
886  l_out_row_major.resize(patchTensorRange);
887  patchTensorBuffSize =l_out_row_major.size()*sizeof(DataType);
888  gpu_data_l_out_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
889  TensorMap<Tensor<DataType, 5, RowMajor,IndexType>>gpu_l_out_row_major_resize1(gpu_data_l_out_row_major, patchTensorRange);
890  gpu_l_out_row_major_resize1.device(sycl_device)=gpu_l_in_col_major_resize1.swap_layout().extract_image_patches(9, 9);
891  sycl_device.memcpyDeviceToHost(l_out_row_major.data(), gpu_data_l_out_row_major, patchTensorBuffSize);
892 
893  VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32);
894  VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 64*64);
895  VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 9);
896  VERIFY_IS_EQUAL(l_out_row_major.dimension(3), 9);
897  VERIFY_IS_EQUAL(l_out_row_major.dimension(4), 16);
898 
899  for (IndexType b = 0; b < 32; ++b) {
900  for (IndexType i = 0; i < 64; ++i) {
901  for (IndexType j = 0; j < 64; ++j) {
902  IndexType patchId = i+64*j;
903  for (IndexType c = 0; c < 9; ++c) {
904  for (IndexType r = 0; r < 9; ++r) {
905  for (IndexType d = 0; d < 16; ++d) {
906  DataType expected = 0.0f;
907  if (r-4+i >= 0 && c-4+j >= 0 && r-4+i < 64 && c-4+j < 64) {
908  expected = l_in_col_major(d, r-4+i, c-4+j, b);
909  }
910  // ColMajor
911  if (l_out_col_major(d, r, c, patchId, b) != expected) {
912  std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
913  }
914  VERIFY_IS_EQUAL(l_out_col_major(d, r, c, patchId, b), expected);
915  // RowMajor
916  if (l_out_row_major(b, patchId, c, r, d) != expected) {
917  std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
918  }
919  VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected);
920  }
921  }
922  }
923  }
924  }
925  }
926 
927  // ColMajor
928 
929  sycl_device.deallocate(gpu_data_l_in_col_major);
930  sycl_device.deallocate(gpu_data_l_out_col_major);
931  sizeDim1 = 32;
932  sizeDim2 = 16;
933  sizeDim3 = 16;
934  sizeDim4 = 32;
935  tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
936  l_in_col_major.resize(tensorColMajorRange);
937  l_in_col_major.setRandom();
938  gpu_data_l_in_col_major = static_cast<DataType*>(sycl_device.allocate(l_in_col_major.size()*sizeof(DataType)));
939  TensorMap<Tensor<DataType, 4, ColMajor, IndexType>>gpu_l_in_col_major_resize2(gpu_data_l_in_col_major, tensorColMajorRange);
940 
941  patchTensorRange={{sizeDim1, 7, 7, sizeDim2*sizeDim3, sizeDim4}};
942  l_out_col_major.resize(patchTensorRange);
943  patchTensorBuffSize =l_out_col_major.size()*sizeof(DataType);
944  gpu_data_l_out_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
945  TensorMap<Tensor<DataType, 5, DataLayout,IndexType>>gpu_l_out_col_major_resize2(gpu_data_l_out_col_major, patchTensorRange);
946  sycl_device.memcpyHostToDevice(gpu_data_l_in_col_major, l_in_col_major.data(),(l_in_col_major.size())*sizeof(DataType));
947  gpu_l_out_col_major_resize2.device(sycl_device)=gpu_l_in_col_major_resize2.extract_image_patches(7, 7);
948  sycl_device.memcpyDeviceToHost(l_out_col_major.data(), gpu_data_l_out_col_major, patchTensorBuffSize);
949 
950  VERIFY_IS_EQUAL(l_out_col_major.dimension(0), 32);
951  VERIFY_IS_EQUAL(l_out_col_major.dimension(1), 7);
952  VERIFY_IS_EQUAL(l_out_col_major.dimension(2), 7);
953  VERIFY_IS_EQUAL(l_out_col_major.dimension(3), 16*16);
954  VERIFY_IS_EQUAL(l_out_col_major.dimension(4), 32);
955 
956  // RowMajor
957  sycl_device.deallocate(gpu_data_l_out_row_major);
958  patchTensorRange={{sizeDim4, sizeDim2*sizeDim3, 7, 7 ,sizeDim1}};
959  l_out_row_major.resize(patchTensorRange);
960  patchTensorBuffSize =l_out_row_major.size()*sizeof(DataType);
961  gpu_data_l_out_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
962  TensorMap<Tensor<DataType, 5, RowMajor,IndexType>>gpu_l_out_row_major_resize2(gpu_data_l_out_row_major, patchTensorRange);
963  gpu_l_out_row_major_resize2.device(sycl_device)=gpu_l_in_col_major_resize2.swap_layout().extract_image_patches(7, 7);
964  sycl_device.memcpyDeviceToHost(l_out_row_major.data(), gpu_data_l_out_row_major, patchTensorBuffSize);
965 
966  VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32);
967  VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 16*16);
968  VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 7);
969  VERIFY_IS_EQUAL(l_out_row_major.dimension(3), 7);
970  VERIFY_IS_EQUAL(l_out_row_major.dimension(4), 32);
971 
972  for (IndexType b = 0; b < 32; ++b) {
973  for (IndexType i = 0; i < 16; ++i) {
974  for (IndexType j = 0; j < 16; ++j) {
975  IndexType patchId = i+16*j;
976  for (IndexType c = 0; c < 7; ++c) {
977  for (IndexType r = 0; r < 7; ++r) {
978  for (IndexType d = 0; d < 32; ++d) {
979  DataType expected = 0.0f;
980  if (r-3+i >= 0 && c-3+j >= 0 && r-3+i < 16 && c-3+j < 16) {
981  expected = l_in_col_major(d, r-3+i, c-3+j, b);
982  }
983  // ColMajor
984  if (l_out_col_major(d, r, c, patchId, b) != expected) {
985  std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
986  }
987  VERIFY_IS_EQUAL(l_out_col_major(d, r, c, patchId, b), expected);
988  // RowMajor
989  if (l_out_row_major(b, patchId, c, r, d) != expected) {
990  std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
991  }
992  VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected);
993  }
994  }
995  }
996  }
997  }
998  }
999 
1000  // ColMajor
1001  sycl_device.deallocate(gpu_data_l_in_col_major);
1002  sycl_device.deallocate(gpu_data_l_out_col_major);
1003  sizeDim1 = 64;
1004  sizeDim2 = 13;
1005  sizeDim3 = 13;
1006  sizeDim4 = 32;
1007  tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
1008  l_in_col_major.resize(tensorColMajorRange);
1009  l_in_col_major.setRandom();
1010  gpu_data_l_in_col_major = static_cast<DataType*>(sycl_device.allocate(l_in_col_major.size()*sizeof(DataType)));
1011  TensorMap<Tensor<DataType, 4, ColMajor, IndexType>>gpu_l_in_col_major_resize3(gpu_data_l_in_col_major, tensorColMajorRange);
1012 
1013  patchTensorRange={{sizeDim1, 3, 3, sizeDim2*sizeDim3, sizeDim4}};
1014  l_out_col_major.resize(patchTensorRange);
1015  patchTensorBuffSize =l_out_col_major.size()*sizeof(DataType);
1016  gpu_data_l_out_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
1017  TensorMap<Tensor<DataType, 5, DataLayout,IndexType>>gpu_l_out_col_major_resize3(gpu_data_l_out_col_major, patchTensorRange);
1018  sycl_device.memcpyHostToDevice(gpu_data_l_in_col_major, l_in_col_major.data(),(l_in_col_major.size())*sizeof(DataType));
1019  gpu_l_out_col_major_resize3.device(sycl_device)=gpu_l_in_col_major_resize3.extract_image_patches(3, 3);
1020  sycl_device.memcpyDeviceToHost(l_out_col_major.data(), gpu_data_l_out_col_major, patchTensorBuffSize);
1021 
1022  VERIFY_IS_EQUAL(l_out_col_major.dimension(0), 64);
1023  VERIFY_IS_EQUAL(l_out_col_major.dimension(1), 3);
1024  VERIFY_IS_EQUAL(l_out_col_major.dimension(2), 3);
1025  VERIFY_IS_EQUAL(l_out_col_major.dimension(3), 13*13);
1026  VERIFY_IS_EQUAL(l_out_col_major.dimension(4), 32);
1027 
1028  // RowMajor
1029  sycl_device.deallocate(gpu_data_l_out_row_major);
1030  patchTensorRange={{sizeDim4, sizeDim2*sizeDim3, 3, 3 ,sizeDim1}};
1031  l_out_row_major.resize(patchTensorRange);
1032  patchTensorBuffSize =l_out_row_major.size()*sizeof(DataType);
1033  gpu_data_l_out_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
1034  TensorMap<Tensor<DataType, 5, RowMajor,IndexType>>gpu_l_out_row_major_resize3(gpu_data_l_out_row_major, patchTensorRange);
1035  gpu_l_out_row_major_resize3.device(sycl_device)=gpu_l_in_col_major_resize3.swap_layout().extract_image_patches(3, 3);
1036  sycl_device.memcpyDeviceToHost(l_out_row_major.data(), gpu_data_l_out_row_major, patchTensorBuffSize);
1037 
1038  VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32);
1039  VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 13*13);
1040  VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 3);
1041  VERIFY_IS_EQUAL(l_out_row_major.dimension(3), 3);
1042  VERIFY_IS_EQUAL(l_out_row_major.dimension(4), 64);
1043 
1044  for (IndexType b = 0; b < 32; ++b) {
1045  for (IndexType i = 0; i < 13; ++i) {
1046  for (IndexType j = 0; j < 13; ++j) {
1047  IndexType patchId = i+13*j;
1048  for (IndexType c = 0; c < 3; ++c) {
1049  for (IndexType r = 0; r < 3; ++r) {
1050  for (IndexType d = 0; d < 64; ++d) {
1051  DataType expected = 0.0f;
1052  if (r-1+i >= 0 && c-1+j >= 0 && r-1+i < 13 && c-1+j < 13) {
1053  expected = l_in_col_major(d, r-1+i, c-1+j, b);
1054  }
1055  // ColMajor
1056  if (l_out_col_major(d, r, c, patchId, b) != expected) {
1057  std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
1058  }
1059  VERIFY_IS_EQUAL(l_out_col_major(d, r, c, patchId, b), expected);
1060  // RowMajor
1061  if (l_out_row_major(b, patchId, c, r, d) != expected) {
1062  std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
1063  }
1064  VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected);
1065  }
1066  }
1067  }
1068  }
1069  }
1070  }
1071  sycl_device.deallocate(gpu_data_l_in_col_major);
1072  sycl_device.deallocate(gpu_data_l_out_col_major);
1073  sycl_device.deallocate(gpu_data_l_out_row_major);
1074 }
1075 
1076 
1077 template<typename DataType, typename dev_Selector> void sycl_tensor_image_patch_test_per_device(dev_Selector s){
1078 QueueInterface queueInterface(s);
1079 auto sycl_device = Eigen::SyclDevice(&queueInterface);
1080 test_simple_image_patch_sycl<DataType, int64_t>(sycl_device);
1081 test_patch_padding_valid_sycl<DataType, int64_t>(sycl_device);
1082 test_patch_padding_valid_same_value_sycl<DataType, int64_t>(sycl_device);
1083 test_patch_padding_same_sycl<DataType, int64_t>(sycl_device);
1084 test_patch_no_extra_dim_sycl<DataType, int64_t>(sycl_device);
1085 test_imagenet_patches_sycl<DataType, int64_t>(sycl_device);
1086 }
1087 EIGEN_DECLARE_TEST(cxx11_tensor_image_patch_sycl)
1088 {
1089 for (const auto& device :Eigen::get_sycl_supported_devices()) {
1090  CALL_SUBTEST(sycl_tensor_image_patch_test_per_device<float>(device));
1091 }
1092 }
Eigen::Tensor::dimension
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index dimension(std::size_t n) const
Definition: Tensor.h:101
Eigen::Tensor
The tensor class.
Definition: Tensor.h:63
s
RealScalar s
Definition: level1_cplx_impl.h:126
d
static const double d[K][N]
Definition: igam.h:11
Eigen::array
Definition: EmulateArray.h:21
VERIFY_IS_EQUAL
#define VERIFY_IS_EQUAL(a, b)
Definition: main.h:386
c
Scalar Scalar * c
Definition: benchVecAdd.cpp:17
b
Scalar * b
Definition: benchVecAdd.cpp:17
DataLayout
static const int DataLayout
Definition: cxx11_tensor_image_patch_sycl.cpp:24
test_simple_image_patch_sycl
static void test_simple_image_patch_sycl(const Eigen::SyclDevice &sycl_device)
Definition: cxx11_tensor_image_patch_sycl.cpp:27
Eigen::PADDING_SAME
@ PADDING_SAME
Definition: TensorTraits.h:259
test_imagenet_patches_sycl
static void test_imagenet_patches_sycl(const Eigen::SyclDevice &sycl_device)
Definition: cxx11_tensor_image_patch_sycl.cpp:777
EIGEN_DECLARE_TEST
EIGEN_DECLARE_TEST(cxx11_tensor_image_patch_sycl)
Definition: cxx11_tensor_image_patch_sycl.cpp:1087
test_patch_padding_valid_same_value_sycl
static void test_patch_padding_valid_same_value_sycl(const Eigen::SyclDevice &sycl_device)
Definition: cxx11_tensor_image_patch_sycl.cpp:365
test_patch_padding_valid_sycl
static void test_patch_padding_valid_sycl(const Eigen::SyclDevice &sycl_device)
Definition: cxx11_tensor_image_patch_sycl.cpp:259
j
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2
Eigen::Tensor::resize
EIGEN_DEVICE_FUNC void resize(const array< Index, NumIndices > &dimensions)
Definition: Tensor.h:447
Eigen::PADDING_VALID
@ PADDING_VALID
Definition: TensorTraits.h:258
sycl_tensor_image_patch_test_per_device
void sycl_tensor_image_patch_test_per_device(dev_Selector s)
Definition: cxx11_tensor_image_patch_sycl.cpp:1077
cholesky::expected
Matrix expected
Definition: testMatrix.cpp:971
Eigen::TensorMap
A tensor expression mapping an existing array of data.
Definition: TensorForwardDeclarations.h:52
tree::f
Point2(* f)(const Point3 &, OptionalJacobian< 2, 3 >)
Definition: testExpression.cpp:218
Eigen::TensorBase< Tensor< Scalar_, NumIndices_, Options_, IndexType_ > >::setRandom
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor< Scalar_, NumIndices_, Options_, IndexType_ > & setRandom()
Definition: TensorBase.h:996
main.h
Eigen::Tensor::data
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar * data()
Definition: Tensor.h:104
test_patch_padding_same_sycl
static void test_patch_padding_same_sycl(const Eigen::SyclDevice &sycl_device)
Definition: cxx11_tensor_image_patch_sycl.cpp:462
Eigen::Tensor::size
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const
Definition: Tensor.h:103
test_patch_no_extra_dim_sycl
static void test_patch_no_extra_dim_sycl(const Eigen::SyclDevice &sycl_device)
Definition: cxx11_tensor_image_patch_sycl.cpp:567
Eigen::ColMajor
@ ColMajor
Definition: Constants.h:319
i
int i
Definition: BiCGSTAB_step_by_step.cpp:9
CALL_SUBTEST
#define CALL_SUBTEST(FUNC)
Definition: main.h:399


gtsam
Author(s):
autogenerated on Wed Jan 1 2025 04:01:23