cxx11_tensor_chipping_sycl.cpp
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2016
5 // Mehdi Goli Codeplay Software Ltd.
6 // Ralph Potter Codeplay Software Ltd.
7 // Luke Iwanski Codeplay Software Ltd.
8 // Contact: <eigen@codeplay.com>
9 // Benoit Steiner <benoit.steiner.goog@gmail.com>
10 //
11 // This Source Code Form is subject to the terms of the Mozilla
12 // Public License v. 2.0. If a copy of the MPL was not distributed
13 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
14 
15 
16 #define EIGEN_TEST_NO_LONGDOUBLE
17 #define EIGEN_TEST_NO_COMPLEX
18 
19 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
20 #define EIGEN_USE_SYCL
21 
22 #include "main.h"
23 
24 #include <Eigen/CXX11/Tensor>
25 
26 using Eigen::Tensor;
27 
28 template <typename DataType, int DataLayout, typename IndexType>
29 static void test_static_chip_sycl(const Eigen::SyclDevice& sycl_device)
30 {
31  IndexType sizeDim1 = 2;
32  IndexType sizeDim2 = 3;
33  IndexType sizeDim3 = 5;
34  IndexType sizeDim4 = 7;
35  IndexType sizeDim5 = 11;
36 
37  array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
38  array<IndexType, 4> chip1TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
39 
41  Tensor<DataType, 4, DataLayout,IndexType> chip1(chip1TensorRange);
42 
43  tensor.setRandom();
44 
45  const size_t tensorBuffSize =tensor.size()*sizeof(DataType);
46  const size_t chip1TensorBuffSize =chip1.size()*sizeof(DataType);
47  DataType* gpu_data_tensor = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
48  DataType* gpu_data_chip1 = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize));
49 
50  TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange);
51  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip1(gpu_data_chip1, chip1TensorRange);
52 
53  sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize);
54  gpu_chip1.device(sycl_device)=gpu_tensor.template chip<0l>(1l);
55  sycl_device.memcpyDeviceToHost(chip1.data(), gpu_data_chip1, chip1TensorBuffSize);
56 
57  VERIFY_IS_EQUAL(chip1.dimension(0), sizeDim2);
58  VERIFY_IS_EQUAL(chip1.dimension(1), sizeDim3);
59  VERIFY_IS_EQUAL(chip1.dimension(2), sizeDim4);
60  VERIFY_IS_EQUAL(chip1.dimension(3), sizeDim5);
61 
62  for (IndexType i = 0; i < sizeDim2; ++i) {
63  for (IndexType j = 0; j < sizeDim3; ++j) {
64  for (IndexType k = 0; k < sizeDim4; ++k) {
65  for (IndexType l = 0; l < sizeDim5; ++l) {
66  VERIFY_IS_EQUAL(chip1(i,j,k,l), tensor(1l,i,j,k,l));
67  }
68  }
69  }
70  }
71 
72  array<IndexType, 4> chip2TensorRange = {{sizeDim1, sizeDim3, sizeDim4, sizeDim5}};
73  Tensor<DataType, 4, DataLayout,IndexType> chip2(chip2TensorRange);
74  const size_t chip2TensorBuffSize =chip2.size()*sizeof(DataType);
75  DataType* gpu_data_chip2 = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize));
76  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip2(gpu_data_chip2, chip2TensorRange);
77 
78  gpu_chip2.device(sycl_device)=gpu_tensor.template chip<1l>(1l);
79  sycl_device.memcpyDeviceToHost(chip2.data(), gpu_data_chip2, chip2TensorBuffSize);
80 
81  VERIFY_IS_EQUAL(chip2.dimension(0), sizeDim1);
82  VERIFY_IS_EQUAL(chip2.dimension(1), sizeDim3);
83  VERIFY_IS_EQUAL(chip2.dimension(2), sizeDim4);
84  VERIFY_IS_EQUAL(chip2.dimension(3), sizeDim5);
85 
86  for (IndexType i = 0; i < sizeDim1; ++i) {
87  for (IndexType j = 0; j < sizeDim3; ++j) {
88  for (IndexType k = 0; k < sizeDim4; ++k) {
89  for (IndexType l = 0; l < sizeDim5; ++l) {
90  VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1l,j,k,l));
91  }
92  }
93  }
94  }
95 
96  array<IndexType, 4> chip3TensorRange = {{sizeDim1, sizeDim2, sizeDim4, sizeDim5}};
97  Tensor<DataType, 4, DataLayout,IndexType> chip3(chip3TensorRange);
98  const size_t chip3TensorBuffSize =chip3.size()*sizeof(DataType);
99  DataType* gpu_data_chip3 = static_cast<DataType*>(sycl_device.allocate(chip3TensorBuffSize));
100  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip3(gpu_data_chip3, chip3TensorRange);
101 
102  gpu_chip3.device(sycl_device)=gpu_tensor.template chip<2l>(2l);
103  sycl_device.memcpyDeviceToHost(chip3.data(), gpu_data_chip3, chip3TensorBuffSize);
104 
105  VERIFY_IS_EQUAL(chip3.dimension(0), sizeDim1);
106  VERIFY_IS_EQUAL(chip3.dimension(1), sizeDim2);
107  VERIFY_IS_EQUAL(chip3.dimension(2), sizeDim4);
108  VERIFY_IS_EQUAL(chip3.dimension(3), sizeDim5);
109 
110  for (IndexType i = 0; i < sizeDim1; ++i) {
111  for (IndexType j = 0; j < sizeDim2; ++j) {
112  for (IndexType k = 0; k < sizeDim4; ++k) {
113  for (IndexType l = 0; l < sizeDim5; ++l) {
114  VERIFY_IS_EQUAL(chip3(i,j,k,l), tensor(i,j,2l,k,l));
115  }
116  }
117  }
118  }
119 
120  array<IndexType, 4> chip4TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim5}};
121  Tensor<DataType, 4, DataLayout,IndexType> chip4(chip4TensorRange);
122  const size_t chip4TensorBuffSize =chip4.size()*sizeof(DataType);
123  DataType* gpu_data_chip4 = static_cast<DataType*>(sycl_device.allocate(chip4TensorBuffSize));
124  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip4(gpu_data_chip4, chip4TensorRange);
125 
126  gpu_chip4.device(sycl_device)=gpu_tensor.template chip<3l>(5l);
127  sycl_device.memcpyDeviceToHost(chip4.data(), gpu_data_chip4, chip4TensorBuffSize);
128 
129  VERIFY_IS_EQUAL(chip4.dimension(0), sizeDim1);
130  VERIFY_IS_EQUAL(chip4.dimension(1), sizeDim2);
131  VERIFY_IS_EQUAL(chip4.dimension(2), sizeDim3);
132  VERIFY_IS_EQUAL(chip4.dimension(3), sizeDim5);
133 
134  for (IndexType i = 0; i < sizeDim1; ++i) {
135  for (IndexType j = 0; j < sizeDim2; ++j) {
136  for (IndexType k = 0; k < sizeDim3; ++k) {
137  for (IndexType l = 0; l < sizeDim5; ++l) {
138  VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5l,l));
139  }
140  }
141  }
142  }
143 
144 
145  array<IndexType, 4> chip5TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
146  Tensor<DataType, 4, DataLayout,IndexType> chip5(chip5TensorRange);
147  const size_t chip5TensorBuffSize =chip5.size()*sizeof(DataType);
148  DataType* gpu_data_chip5 = static_cast<DataType*>(sycl_device.allocate(chip5TensorBuffSize));
149  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip5(gpu_data_chip5, chip5TensorRange);
150 
151  gpu_chip5.device(sycl_device)=gpu_tensor.template chip<4l>(7l);
152  sycl_device.memcpyDeviceToHost(chip5.data(), gpu_data_chip5, chip5TensorBuffSize);
153 
154  VERIFY_IS_EQUAL(chip5.dimension(0), sizeDim1);
155  VERIFY_IS_EQUAL(chip5.dimension(1), sizeDim2);
156  VERIFY_IS_EQUAL(chip5.dimension(2), sizeDim3);
157  VERIFY_IS_EQUAL(chip5.dimension(3), sizeDim4);
158 
159  for (IndexType i = 0; i < sizeDim1; ++i) {
160  for (IndexType j = 0; j < sizeDim2; ++j) {
161  for (IndexType k = 0; k < sizeDim3; ++k) {
162  for (IndexType l = 0; l < sizeDim4; ++l) {
163  VERIFY_IS_EQUAL(chip5(i,j,k,l), tensor(i,j,k,l,7l));
164  }
165  }
166  }
167  }
168 
169  sycl_device.deallocate(gpu_data_tensor);
170  sycl_device.deallocate(gpu_data_chip1);
171  sycl_device.deallocate(gpu_data_chip2);
172  sycl_device.deallocate(gpu_data_chip3);
173  sycl_device.deallocate(gpu_data_chip4);
174  sycl_device.deallocate(gpu_data_chip5);
175 }
176 
177 template <typename DataType, int DataLayout, typename IndexType>
178 static void test_dynamic_chip_sycl(const Eigen::SyclDevice& sycl_device)
179 {
180  IndexType sizeDim1 = 2;
181  IndexType sizeDim2 = 3;
182  IndexType sizeDim3 = 5;
183  IndexType sizeDim4 = 7;
184  IndexType sizeDim5 = 11;
185 
186  array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
187  array<IndexType, 4> chip1TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
188 
189  Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange);
190  Tensor<DataType, 4, DataLayout,IndexType> chip1(chip1TensorRange);
191 
192  tensor.setRandom();
193 
194  const size_t tensorBuffSize =tensor.size()*sizeof(DataType);
195  const size_t chip1TensorBuffSize =chip1.size()*sizeof(DataType);
196  DataType* gpu_data_tensor = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
197  DataType* gpu_data_chip1 = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize));
198 
199  TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange);
200  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip1(gpu_data_chip1, chip1TensorRange);
201 
202  sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize);
203  gpu_chip1.device(sycl_device)=gpu_tensor.chip(1l,0l);
204  sycl_device.memcpyDeviceToHost(chip1.data(), gpu_data_chip1, chip1TensorBuffSize);
205 
206  VERIFY_IS_EQUAL(chip1.dimension(0), sizeDim2);
207  VERIFY_IS_EQUAL(chip1.dimension(1), sizeDim3);
208  VERIFY_IS_EQUAL(chip1.dimension(2), sizeDim4);
209  VERIFY_IS_EQUAL(chip1.dimension(3), sizeDim5);
210 
211  for (IndexType i = 0; i < sizeDim2; ++i) {
212  for (IndexType j = 0; j < sizeDim3; ++j) {
213  for (IndexType k = 0; k < sizeDim4; ++k) {
214  for (IndexType l = 0; l < sizeDim5; ++l) {
215  VERIFY_IS_EQUAL(chip1(i,j,k,l), tensor(1l,i,j,k,l));
216  }
217  }
218  }
219  }
220 
221  array<IndexType, 4> chip2TensorRange = {{sizeDim1, sizeDim3, sizeDim4, sizeDim5}};
222  Tensor<DataType, 4, DataLayout,IndexType> chip2(chip2TensorRange);
223  const size_t chip2TensorBuffSize =chip2.size()*sizeof(DataType);
224  DataType* gpu_data_chip2 = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize));
225  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip2(gpu_data_chip2, chip2TensorRange);
226 
227  gpu_chip2.device(sycl_device)=gpu_tensor.chip(1l,1l);
228  sycl_device.memcpyDeviceToHost(chip2.data(), gpu_data_chip2, chip2TensorBuffSize);
229 
230  VERIFY_IS_EQUAL(chip2.dimension(0), sizeDim1);
231  VERIFY_IS_EQUAL(chip2.dimension(1), sizeDim3);
232  VERIFY_IS_EQUAL(chip2.dimension(2), sizeDim4);
233  VERIFY_IS_EQUAL(chip2.dimension(3), sizeDim5);
234 
235  for (IndexType i = 0; i < sizeDim1; ++i) {
236  for (IndexType j = 0; j < sizeDim3; ++j) {
237  for (IndexType k = 0; k < sizeDim4; ++k) {
238  for (IndexType l = 0; l < sizeDim5; ++l) {
239  VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1l,j,k,l));
240  }
241  }
242  }
243  }
244 
245  array<IndexType, 4> chip3TensorRange = {{sizeDim1, sizeDim2, sizeDim4, sizeDim5}};
246  Tensor<DataType, 4, DataLayout,IndexType> chip3(chip3TensorRange);
247  const size_t chip3TensorBuffSize =chip3.size()*sizeof(DataType);
248  DataType* gpu_data_chip3 = static_cast<DataType*>(sycl_device.allocate(chip3TensorBuffSize));
249  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip3(gpu_data_chip3, chip3TensorRange);
250 
251  gpu_chip3.device(sycl_device)=gpu_tensor.chip(2l,2l);
252  sycl_device.memcpyDeviceToHost(chip3.data(), gpu_data_chip3, chip3TensorBuffSize);
253 
254  VERIFY_IS_EQUAL(chip3.dimension(0), sizeDim1);
255  VERIFY_IS_EQUAL(chip3.dimension(1), sizeDim2);
256  VERIFY_IS_EQUAL(chip3.dimension(2), sizeDim4);
257  VERIFY_IS_EQUAL(chip3.dimension(3), sizeDim5);
258 
259  for (IndexType i = 0; i < sizeDim1; ++i) {
260  for (IndexType j = 0; j < sizeDim2; ++j) {
261  for (IndexType k = 0; k < sizeDim4; ++k) {
262  for (IndexType l = 0; l < sizeDim5; ++l) {
263  VERIFY_IS_EQUAL(chip3(i,j,k,l), tensor(i,j,2l,k,l));
264  }
265  }
266  }
267  }
268 
269  array<IndexType, 4> chip4TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim5}};
270  Tensor<DataType, 4, DataLayout,IndexType> chip4(chip4TensorRange);
271  const size_t chip4TensorBuffSize =chip4.size()*sizeof(DataType);
272  DataType* gpu_data_chip4 = static_cast<DataType*>(sycl_device.allocate(chip4TensorBuffSize));
273  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip4(gpu_data_chip4, chip4TensorRange);
274 
275  gpu_chip4.device(sycl_device)=gpu_tensor.chip(5l,3l);
276  sycl_device.memcpyDeviceToHost(chip4.data(), gpu_data_chip4, chip4TensorBuffSize);
277 
278  VERIFY_IS_EQUAL(chip4.dimension(0), sizeDim1);
279  VERIFY_IS_EQUAL(chip4.dimension(1), sizeDim2);
280  VERIFY_IS_EQUAL(chip4.dimension(2), sizeDim3);
281  VERIFY_IS_EQUAL(chip4.dimension(3), sizeDim5);
282 
283  for (IndexType i = 0; i < sizeDim1; ++i) {
284  for (IndexType j = 0; j < sizeDim2; ++j) {
285  for (IndexType k = 0; k < sizeDim3; ++k) {
286  for (IndexType l = 0; l < sizeDim5; ++l) {
287  VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5l,l));
288  }
289  }
290  }
291  }
292 
293 
294  array<IndexType, 4> chip5TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
295  Tensor<DataType, 4, DataLayout,IndexType> chip5(chip5TensorRange);
296  const size_t chip5TensorBuffSize =chip5.size()*sizeof(DataType);
297  DataType* gpu_data_chip5 = static_cast<DataType*>(sycl_device.allocate(chip5TensorBuffSize));
298  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip5(gpu_data_chip5, chip5TensorRange);
299 
300  gpu_chip5.device(sycl_device)=gpu_tensor.chip(7l,4l);
301  sycl_device.memcpyDeviceToHost(chip5.data(), gpu_data_chip5, chip5TensorBuffSize);
302 
303  VERIFY_IS_EQUAL(chip5.dimension(0), sizeDim1);
304  VERIFY_IS_EQUAL(chip5.dimension(1), sizeDim2);
305  VERIFY_IS_EQUAL(chip5.dimension(2), sizeDim3);
306  VERIFY_IS_EQUAL(chip5.dimension(3), sizeDim4);
307 
308  for (IndexType i = 0; i < sizeDim1; ++i) {
309  for (IndexType j = 0; j < sizeDim2; ++j) {
310  for (IndexType k = 0; k < sizeDim3; ++k) {
311  for (IndexType l = 0; l < sizeDim4; ++l) {
312  VERIFY_IS_EQUAL(chip5(i,j,k,l), tensor(i,j,k,l,7l));
313  }
314  }
315  }
316  }
317  sycl_device.deallocate(gpu_data_tensor);
318  sycl_device.deallocate(gpu_data_chip1);
319  sycl_device.deallocate(gpu_data_chip2);
320  sycl_device.deallocate(gpu_data_chip3);
321  sycl_device.deallocate(gpu_data_chip4);
322  sycl_device.deallocate(gpu_data_chip5);
323 }
324 
325 template <typename DataType, int DataLayout, typename IndexType>
326 static void test_chip_in_expr(const Eigen::SyclDevice& sycl_device) {
327 
328  IndexType sizeDim1 = 2;
329  IndexType sizeDim2 = 3;
330  IndexType sizeDim3 = 5;
331  IndexType sizeDim4 = 7;
332  IndexType sizeDim5 = 11;
333 
334  array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
335  array<IndexType, 4> chip1TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
336 
337  Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange);
338 
339  Tensor<DataType, 4, DataLayout,IndexType> chip1(chip1TensorRange);
340  Tensor<DataType, 4, DataLayout,IndexType> tensor1(chip1TensorRange);
341  tensor.setRandom();
342  tensor1.setRandom();
343 
344  const size_t tensorBuffSize =tensor.size()*sizeof(DataType);
345  const size_t chip1TensorBuffSize =chip1.size()*sizeof(DataType);
346  DataType* gpu_data_tensor = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
347  DataType* gpu_data_chip1 = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize));
348  DataType* gpu_data_tensor1 = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize));
349 
350  TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange);
351  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip1(gpu_data_chip1, chip1TensorRange);
352  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_tensor1(gpu_data_tensor1, chip1TensorRange);
353 
354 
355  sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize);
356  sycl_device.memcpyHostToDevice(gpu_data_tensor1, tensor1.data(), chip1TensorBuffSize);
357  gpu_chip1.device(sycl_device)=gpu_tensor.template chip<0l>(0l) + gpu_tensor1;
358  sycl_device.memcpyDeviceToHost(chip1.data(), gpu_data_chip1, chip1TensorBuffSize);
359 
360  for (int i = 0; i < sizeDim2; ++i) {
361  for (int j = 0; j < sizeDim3; ++j) {
362  for (int k = 0; k < sizeDim4; ++k) {
363  for (int l = 0; l < sizeDim5; ++l) {
364  float expected = tensor(0l,i,j,k,l) + tensor1(i,j,k,l);
365  VERIFY_IS_EQUAL(chip1(i,j,k,l), expected);
366  }
367  }
368  }
369  }
370 
371  array<IndexType, 3> chip2TensorRange = {{sizeDim2, sizeDim4, sizeDim5}};
372  Tensor<DataType, 3, DataLayout,IndexType> tensor2(chip2TensorRange);
373  Tensor<DataType, 3, DataLayout,IndexType> chip2(chip2TensorRange);
374  tensor2.setRandom();
375  const size_t chip2TensorBuffSize =tensor2.size()*sizeof(DataType);
376  DataType* gpu_data_tensor2 = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize));
377  DataType* gpu_data_chip2 = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize));
378  TensorMap<Tensor<DataType, 3, DataLayout,IndexType>> gpu_tensor2(gpu_data_tensor2, chip2TensorRange);
379  TensorMap<Tensor<DataType, 3, DataLayout,IndexType>> gpu_chip2(gpu_data_chip2, chip2TensorRange);
380 
381  sycl_device.memcpyHostToDevice(gpu_data_tensor2, tensor2.data(), chip2TensorBuffSize);
382  gpu_chip2.device(sycl_device)=gpu_tensor.template chip<0l>(0l).template chip<1l>(2l) + gpu_tensor2;
383  sycl_device.memcpyDeviceToHost(chip2.data(), gpu_data_chip2, chip2TensorBuffSize);
384 
385  for (int i = 0; i < sizeDim2; ++i) {
386  for (int j = 0; j < sizeDim4; ++j) {
387  for (int k = 0; k < sizeDim5; ++k) {
388  float expected = tensor(0l,i,2l,j,k) + tensor2(i,j,k);
389  VERIFY_IS_EQUAL(chip2(i,j,k), expected);
390  }
391  }
392  }
393  sycl_device.deallocate(gpu_data_tensor);
394  sycl_device.deallocate(gpu_data_tensor1);
395  sycl_device.deallocate(gpu_data_chip1);
396  sycl_device.deallocate(gpu_data_tensor2);
397  sycl_device.deallocate(gpu_data_chip2);
398 }
399 
400 template <typename DataType, int DataLayout, typename IndexType>
401 static void test_chip_as_lvalue_sycl(const Eigen::SyclDevice& sycl_device)
402 {
403 
404  IndexType sizeDim1 = 2;
405  IndexType sizeDim2 = 3;
406  IndexType sizeDim3 = 5;
407  IndexType sizeDim4 = 7;
408  IndexType sizeDim5 = 11;
409 
410  array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
411  array<IndexType, 4> input2TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
412 
413  Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange);
414  Tensor<DataType, 5, DataLayout,IndexType> input1(tensorRange);
415  Tensor<DataType, 4, DataLayout,IndexType> input2(input2TensorRange);
416  input1.setRandom();
417  input2.setRandom();
418 
419 
420  const size_t tensorBuffSize =tensor.size()*sizeof(DataType);
421  const size_t input2TensorBuffSize =input2.size()*sizeof(DataType);
422  std::cout << tensorBuffSize << " , "<< input2TensorBuffSize << std::endl;
423  DataType* gpu_data_tensor = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
424  DataType* gpu_data_input1 = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
425  DataType* gpu_data_input2 = static_cast<DataType*>(sycl_device.allocate(input2TensorBuffSize));
426 
427  TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange);
428  TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_input1(gpu_data_input1, tensorRange);
429  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input2(gpu_data_input2, input2TensorRange);
430 
431  sycl_device.memcpyHostToDevice(gpu_data_input1, input1.data(), tensorBuffSize);
432  gpu_tensor.device(sycl_device)=gpu_input1;
433  sycl_device.memcpyHostToDevice(gpu_data_input2, input2.data(), input2TensorBuffSize);
434  gpu_tensor.template chip<0l>(1l).device(sycl_device)=gpu_input2;
435  sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
436 
437  for (int i = 0; i < sizeDim1; ++i) {
438  for (int j = 0; j < sizeDim2; ++j) {
439  for (int k = 0; k < sizeDim3; ++k) {
440  for (int l = 0; l < sizeDim4; ++l) {
441  for (int m = 0; m < sizeDim5; ++m) {
442  if (i != 1) {
443  VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
444  } else {
445  VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input2(j,k,l,m));
446  }
447  }
448  }
449  }
450  }
451  }
452 
453  gpu_tensor.device(sycl_device)=gpu_input1;
454  array<IndexType, 4> input3TensorRange = {{sizeDim1, sizeDim3, sizeDim4, sizeDim5}};
455  Tensor<DataType, 4, DataLayout,IndexType> input3(input3TensorRange);
456  input3.setRandom();
457 
458  const size_t input3TensorBuffSize =input3.size()*sizeof(DataType);
459  DataType* gpu_data_input3 = static_cast<DataType*>(sycl_device.allocate(input3TensorBuffSize));
460  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input3(gpu_data_input3, input3TensorRange);
461 
462  sycl_device.memcpyHostToDevice(gpu_data_input3, input3.data(), input3TensorBuffSize);
463  gpu_tensor.template chip<1l>(1l).device(sycl_device)=gpu_input3;
464  sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
465 
466  for (int i = 0; i < sizeDim1; ++i) {
467  for (int j = 0; j < sizeDim2; ++j) {
468  for (int k = 0; k <sizeDim3; ++k) {
469  for (int l = 0; l < sizeDim4; ++l) {
470  for (int m = 0; m < sizeDim5; ++m) {
471  if (j != 1) {
472  VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
473  } else {
474  VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input3(i,k,l,m));
475  }
476  }
477  }
478  }
479  }
480  }
481 
482  gpu_tensor.device(sycl_device)=gpu_input1;
483  array<IndexType, 4> input4TensorRange = {{sizeDim1, sizeDim2, sizeDim4, sizeDim5}};
484  Tensor<DataType, 4, DataLayout,IndexType> input4(input4TensorRange);
485  input4.setRandom();
486 
487  const size_t input4TensorBuffSize =input4.size()*sizeof(DataType);
488  DataType* gpu_data_input4 = static_cast<DataType*>(sycl_device.allocate(input4TensorBuffSize));
489  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input4(gpu_data_input4, input4TensorRange);
490 
491  sycl_device.memcpyHostToDevice(gpu_data_input4, input4.data(), input4TensorBuffSize);
492  gpu_tensor.template chip<2l>(3l).device(sycl_device)=gpu_input4;
493  sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
494 
495  for (int i = 0; i < sizeDim1; ++i) {
496  for (int j = 0; j < sizeDim2; ++j) {
497  for (int k = 0; k <sizeDim3; ++k) {
498  for (int l = 0; l < sizeDim4; ++l) {
499  for (int m = 0; m < sizeDim5; ++m) {
500  if (k != 3) {
501  VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
502  } else {
503  VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input4(i,j,l,m));
504  }
505  }
506  }
507  }
508  }
509  }
510 
511  gpu_tensor.device(sycl_device)=gpu_input1;
512  array<IndexType, 4> input5TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim5}};
513  Tensor<DataType, 4, DataLayout,IndexType> input5(input5TensorRange);
514  input5.setRandom();
515 
516  const size_t input5TensorBuffSize =input5.size()*sizeof(DataType);
517  DataType* gpu_data_input5 = static_cast<DataType*>(sycl_device.allocate(input5TensorBuffSize));
518  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input5(gpu_data_input5, input5TensorRange);
519 
520  sycl_device.memcpyHostToDevice(gpu_data_input5, input5.data(), input5TensorBuffSize);
521  gpu_tensor.template chip<3l>(4l).device(sycl_device)=gpu_input5;
522  sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
523 
524  for (int i = 0; i < sizeDim1; ++i) {
525  for (int j = 0; j < sizeDim2; ++j) {
526  for (int k = 0; k <sizeDim3; ++k) {
527  for (int l = 0; l < sizeDim4; ++l) {
528  for (int m = 0; m < sizeDim5; ++m) {
529  if (l != 4) {
530  VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
531  } else {
532  VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input5(i,j,k,m));
533  }
534  }
535  }
536  }
537  }
538  }
539  gpu_tensor.device(sycl_device)=gpu_input1;
540  array<IndexType, 4> input6TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
541  Tensor<DataType, 4, DataLayout,IndexType> input6(input6TensorRange);
542  input6.setRandom();
543 
544  const size_t input6TensorBuffSize =input6.size()*sizeof(DataType);
545  DataType* gpu_data_input6 = static_cast<DataType*>(sycl_device.allocate(input6TensorBuffSize));
546  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input6(gpu_data_input6, input6TensorRange);
547 
548  sycl_device.memcpyHostToDevice(gpu_data_input6, input6.data(), input6TensorBuffSize);
549  gpu_tensor.template chip<4l>(5l).device(sycl_device)=gpu_input6;
550  sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
551 
552  for (int i = 0; i < sizeDim1; ++i) {
553  for (int j = 0; j < sizeDim2; ++j) {
554  for (int k = 0; k <sizeDim3; ++k) {
555  for (int l = 0; l < sizeDim4; ++l) {
556  for (int m = 0; m < sizeDim5; ++m) {
557  if (m != 5) {
558  VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
559  } else {
560  VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input6(i,j,k,l));
561  }
562  }
563  }
564  }
565  }
566  }
567 
568 
569  gpu_tensor.device(sycl_device)=gpu_input1;
570  Tensor<DataType, 5, DataLayout,IndexType> input7(tensorRange);
571  input7.setRandom();
572 
573  DataType* gpu_data_input7 = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
574  TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_input7(gpu_data_input7, tensorRange);
575 
576  sycl_device.memcpyHostToDevice(gpu_data_input7, input7.data(), tensorBuffSize);
577  gpu_tensor.chip(0l,0l).device(sycl_device)=gpu_input7.chip(0l,0l);
578  sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
579 
580  for (int i = 0; i < sizeDim1; ++i) {
581  for (int j = 0; j < sizeDim2; ++j) {
582  for (int k = 0; k <sizeDim3; ++k) {
583  for (int l = 0; l < sizeDim4; ++l) {
584  for (int m = 0; m < sizeDim5; ++m) {
585  if (i != 0) {
586  VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
587  } else {
588  VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input7(i,j,k,l,m));
589  }
590  }
591  }
592  }
593  }
594  }
595  sycl_device.deallocate(gpu_data_tensor);
596  sycl_device.deallocate(gpu_data_input1);
597  sycl_device.deallocate(gpu_data_input2);
598  sycl_device.deallocate(gpu_data_input3);
599  sycl_device.deallocate(gpu_data_input4);
600  sycl_device.deallocate(gpu_data_input5);
601  sycl_device.deallocate(gpu_data_input6);
602  sycl_device.deallocate(gpu_data_input7);
603 
604 }
605 
606 template<typename DataType, typename dev_Selector> void sycl_chipping_test_per_device(dev_Selector s){
607  QueueInterface queueInterface(s);
608  auto sycl_device = Eigen::SyclDevice(&queueInterface);
609  /* test_static_chip_sycl<DataType, RowMajor, int64_t>(sycl_device);
610  test_static_chip_sycl<DataType, ColMajor, int64_t>(sycl_device);
611  test_dynamic_chip_sycl<DataType, RowMajor, int64_t>(sycl_device);
612  test_dynamic_chip_sycl<DataType, ColMajor, int64_t>(sycl_device);
613  test_chip_in_expr<DataType, RowMajor, int64_t>(sycl_device);
614  test_chip_in_expr<DataType, ColMajor, int64_t>(sycl_device);*/
615  test_chip_as_lvalue_sycl<DataType, RowMajor, int64_t>(sycl_device);
616  // test_chip_as_lvalue_sycl<DataType, ColMajor, int64_t>(sycl_device);
617 }
618 EIGEN_DECLARE_TEST(cxx11_tensor_chipping_sycl)
619 {
620  for (const auto& device :Eigen::get_sycl_supported_devices()) {
621  CALL_SUBTEST(sycl_chipping_test_per_device<float>(device));
622  }
623 }
Eigen::Tensor::dimension
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index dimension(std::size_t n) const
Definition: Tensor.h:101
Eigen::Tensor
The tensor class.
Definition: Tensor.h:63
test_chip_in_expr
static void test_chip_in_expr(const Eigen::SyclDevice &sycl_device)
Definition: cxx11_tensor_chipping_sycl.cpp:326
test_chip_as_lvalue_sycl
static void test_chip_as_lvalue_sycl(const Eigen::SyclDevice &sycl_device)
Definition: cxx11_tensor_chipping_sycl.cpp:401
test_dynamic_chip_sycl
static void test_dynamic_chip_sycl(const Eigen::SyclDevice &sycl_device)
Definition: cxx11_tensor_chipping_sycl.cpp:178
s
RealScalar s
Definition: level1_cplx_impl.h:126
Eigen::array
Definition: EmulateArray.h:21
VERIFY_IS_EQUAL
#define VERIFY_IS_EQUAL(a, b)
Definition: main.h:386
test_static_chip_sycl
static void test_static_chip_sycl(const Eigen::SyclDevice &sycl_device)
Definition: cxx11_tensor_chipping_sycl.cpp:29
sycl_chipping_test_per_device
void sycl_chipping_test_per_device(dev_Selector s)
Definition: cxx11_tensor_chipping_sycl.cpp:606
j
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2
l
static const Line3 l(Rot3(), 1, 1)
cholesky::expected
Matrix expected
Definition: testMatrix.cpp:971
Eigen::TensorMap
A tensor expression mapping an existing array of data.
Definition: TensorForwardDeclarations.h:52
m
Matrix3f m
Definition: AngleAxis_mimic_euler.cpp:1
Eigen::TensorBase< Tensor< Scalar_, NumIndices_, Options_, IndexType_ > >::setRandom
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor< Scalar_, NumIndices_, Options_, IndexType_ > & setRandom()
Definition: TensorBase.h:996
main.h
EIGEN_DECLARE_TEST
EIGEN_DECLARE_TEST(cxx11_tensor_chipping_sycl)
Definition: cxx11_tensor_chipping_sycl.cpp:618
Eigen::Tensor::data
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar * data()
Definition: Tensor.h:104
Eigen::Tensor::size
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const
Definition: Tensor.h:103
i
int i
Definition: BiCGSTAB_step_by_step.cpp:9
CALL_SUBTEST
#define CALL_SUBTEST(FUNC)
Definition: main.h:399


gtsam
Author(s):
autogenerated on Wed Jan 1 2025 04:01:23