cxx11_tensor_builtins_sycl.cpp
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2016
5 // Mehdi Goli Codeplay Software Ltd.
6 // Ralph Potter Codeplay Software Ltd.
7 // Luke Iwanski Codeplay Software Ltd.
8 // Contact: <eigen@codeplay.com>
9 //
10 // This Source Code Form is subject to the terms of the Mozilla
11 // Public License v. 2.0. If a copy of the MPL was not distributed
12 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
13 
14 #define EIGEN_TEST_NO_LONGDOUBLE
15 #define EIGEN_TEST_NO_COMPLEX
16 
17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
18 #define EIGEN_USE_SYCL
19 
20 #include "main.h"
21 #include <unsupported/Eigen/CXX11/Tensor>
22 
23 using Eigen::array;
24 using Eigen::SyclDevice;
25 using Eigen::Tensor;
26 using Eigen::TensorMap;
27 
28 // Functions used to compare the TensorMap implementation on the device with
29 // the equivalent on the host
30 namespace cl {
31 namespace sycl {
32 template <typename T> T abs(T x) { return cl::sycl::fabs(x); }
33 template <typename T> T square(T x) { return x * x; }
34 template <typename T> T cube(T x) { return x * x * x; }
35 template <typename T> T inverse(T x) { return T(1) / x; }
36 template <typename T> T cwiseMax(T x, T y) { return cl::sycl::max(x, y); }
37 template <typename T> T cwiseMin(T x, T y) { return cl::sycl::min(x, y); }
38 }
39 }
40 
42  template <typename Lhs, typename Rhs>
43  void operator()(Lhs& lhs, const Rhs& rhs) { lhs = rhs; }
44 };
45 
47  template <typename Lhs, typename Rhs>
48  void operator()(Lhs& lhs, const Rhs& rhs) { lhs += rhs; }
49 };
50 
51 template <typename DataType, int DataLayout,
52  typename Assignement, typename Operator>
53 void test_unary_builtins_for_scalar(const Eigen::SyclDevice& sycl_device,
54  const array<int64_t, 3>& tensor_range) {
55  Operator op;
56  Assignement asgn;
57  {
58  /* Assignement(out, Operator(in)) */
61  in = in.random() + DataType(0.01);
62  out = out.random() + DataType(0.01);
64  DataType *gpu_data = static_cast<DataType *>(
65  sycl_device.allocate(in.size() * sizeof(DataType)));
66  DataType *gpu_data_out = static_cast<DataType *>(
67  sycl_device.allocate(out.size() * sizeof(DataType)));
69  TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_out(gpu_data_out, tensor_range);
70  sycl_device.memcpyHostToDevice(gpu_data, in.data(),
71  (in.size()) * sizeof(DataType));
72  sycl_device.memcpyHostToDevice(gpu_data_out, out.data(),
73  (out.size()) * sizeof(DataType));
74  auto device_expr = gpu_out.device(sycl_device);
75  asgn(device_expr, op(gpu));
76  sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out,
77  (out.size()) * sizeof(DataType));
78  for (int64_t i = 0; i < out.size(); ++i) {
79  DataType ver = reference(i);
80  asgn(ver, op(in(i)));
81  VERIFY_IS_APPROX(out(i), ver);
82  }
83  sycl_device.deallocate(gpu_data);
84  sycl_device.deallocate(gpu_data_out);
85  }
86  {
87  /* Assignement(out, Operator(out)) */
89  out = out.random() + DataType(0.01);
91  DataType *gpu_data_out = static_cast<DataType *>(
92  sycl_device.allocate(out.size() * sizeof(DataType)));
93  TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_out(gpu_data_out, tensor_range);
94  sycl_device.memcpyHostToDevice(gpu_data_out, out.data(),
95  (out.size()) * sizeof(DataType));
96  auto device_expr = gpu_out.device(sycl_device);
97  asgn(device_expr, op(gpu_out));
98  sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out,
99  (out.size()) * sizeof(DataType));
100  for (int64_t i = 0; i < out.size(); ++i) {
101  DataType ver = reference(i);
102  asgn(ver, op(reference(i)));
103  VERIFY_IS_APPROX(out(i), ver);
104  }
105  sycl_device.deallocate(gpu_data_out);
106  }
107 }
108 
109 #define DECLARE_UNARY_STRUCT(FUNC) \
110  struct op_##FUNC { \
111  template <typename T> \
112  auto operator()(const T& x) -> decltype(cl::sycl::FUNC(x)) { \
113  return cl::sycl::FUNC(x); \
114  } \
115  template <typename T> \
116  auto operator()(const TensorMap<T>& x) -> decltype(x.FUNC()) { \
117  return x.FUNC(); \
118  } \
119  };
120 
139 
140 template <typename DataType, int DataLayout, typename Assignement>
141 void test_unary_builtins_for_assignement(const Eigen::SyclDevice& sycl_device,
142  const array<int64_t, 3>& tensor_range) {
143 #define RUN_UNARY_TEST(FUNC) \
144  test_unary_builtins_for_scalar<DataType, DataLayout, Assignement, \
145  op_##FUNC>(sycl_device, tensor_range)
161 }
162 
163 template <typename DataType, int DataLayout, typename Operator>
164 void test_unary_builtins_return_bool(const Eigen::SyclDevice& sycl_device,
165  const array<int64_t, 3>& tensor_range) {
166  /* out = op(in) */
167  Operator op;
170  in = in.random() + DataType(0.01);
171  DataType *gpu_data = static_cast<DataType *>(
172  sycl_device.allocate(in.size() * sizeof(DataType)));
173  bool *gpu_data_out =
174  static_cast<bool *>(sycl_device.allocate(out.size() * sizeof(bool)));
176  TensorMap<Tensor<bool, 3, DataLayout, int64_t>> gpu_out(gpu_data_out, tensor_range);
177  sycl_device.memcpyHostToDevice(gpu_data, in.data(),
178  (in.size()) * sizeof(DataType));
179  gpu_out.device(sycl_device) = op(gpu);
180  sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out,
181  (out.size()) * sizeof(bool));
182  for (int64_t i = 0; i < out.size(); ++i) {
183  VERIFY_IS_EQUAL(out(i), op(in(i)));
184  }
185  sycl_device.deallocate(gpu_data);
186  sycl_device.deallocate(gpu_data_out);
187 }
188 
189 template <typename DataType, int DataLayout>
190 void test_unary_builtins(const Eigen::SyclDevice& sycl_device,
191  const array<int64_t, 3>& tensor_range) {
193  PlusEqualAssignement>(sycl_device, tensor_range);
195  EqualAssignement>(sycl_device, tensor_range);
197  op_isnan>(sycl_device, tensor_range);
199  op_isfinite>(sycl_device, tensor_range);
201  op_isinf>(sycl_device, tensor_range);
202 }
203 
204 template <typename DataType>
205 static void test_builtin_unary_sycl(const Eigen::SyclDevice &sycl_device) {
206  int64_t sizeDim1 = 10;
207  int64_t sizeDim2 = 10;
208  int64_t sizeDim3 = 10;
209  array<int64_t, 3> tensor_range = {{sizeDim1, sizeDim2, sizeDim3}};
210 
211  test_unary_builtins<DataType, RowMajor>(sycl_device, tensor_range);
212  test_unary_builtins<DataType, ColMajor>(sycl_device, tensor_range);
213 }
214 
215 template <typename DataType, int DataLayout, typename Operator>
216 void test_binary_builtins_func(const Eigen::SyclDevice& sycl_device,
217  const array<int64_t, 3>& tensor_range) {
218  /* out = op(in_1, in_2) */
219  Operator op;
220  Tensor<DataType, 3, DataLayout, int64_t> in_1(tensor_range);
221  Tensor<DataType, 3, DataLayout, int64_t> in_2(tensor_range);
223  in_1 = in_1.random() + DataType(0.01);
224  in_2 = in_2.random() + DataType(0.01);
226  DataType *gpu_data_1 = static_cast<DataType *>(
227  sycl_device.allocate(in_1.size() * sizeof(DataType)));
228  DataType *gpu_data_2 = static_cast<DataType *>(
229  sycl_device.allocate(in_2.size() * sizeof(DataType)));
230  DataType *gpu_data_out = static_cast<DataType *>(
231  sycl_device.allocate(out.size() * sizeof(DataType)));
232  TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_1(gpu_data_1, tensor_range);
233  TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_2(gpu_data_2, tensor_range);
234  TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_out(gpu_data_out, tensor_range);
235  sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(),
236  (in_1.size()) * sizeof(DataType));
237  sycl_device.memcpyHostToDevice(gpu_data_2, in_2.data(),
238  (in_2.size()) * sizeof(DataType));
239  gpu_out.device(sycl_device) = op(gpu_1, gpu_2);
240  sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out,
241  (out.size()) * sizeof(DataType));
242  for (int64_t i = 0; i < out.size(); ++i) {
243  VERIFY_IS_APPROX(out(i), op(in_1(i), in_2(i)));
244  }
245  sycl_device.deallocate(gpu_data_1);
246  sycl_device.deallocate(gpu_data_2);
247  sycl_device.deallocate(gpu_data_out);
248 }
249 
250 template <typename DataType, int DataLayout, typename Operator>
251 void test_binary_builtins_fixed_arg2(const Eigen::SyclDevice& sycl_device,
252  const array<int64_t, 3>& tensor_range) {
253  /* out = op(in_1, 2) */
254  Operator op;
255  const DataType arg2(2);
256  Tensor<DataType, 3, DataLayout, int64_t> in_1(tensor_range);
258  in_1 = in_1.random();
260  DataType *gpu_data_1 = static_cast<DataType *>(
261  sycl_device.allocate(in_1.size() * sizeof(DataType)));
262  DataType *gpu_data_out = static_cast<DataType *>(
263  sycl_device.allocate(out.size() * sizeof(DataType)));
264  TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_1(gpu_data_1, tensor_range);
265  TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_out(gpu_data_out, tensor_range);
266  sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(),
267  (in_1.size()) * sizeof(DataType));
268  gpu_out.device(sycl_device) = op(gpu_1, arg2);
269  sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out,
270  (out.size()) * sizeof(DataType));
271  for (int64_t i = 0; i < out.size(); ++i) {
272  VERIFY_IS_APPROX(out(i), op(in_1(i), arg2));
273  }
274  sycl_device.deallocate(gpu_data_1);
275  sycl_device.deallocate(gpu_data_out);
276 }
277 
278 #define DECLARE_BINARY_STRUCT(FUNC) \
279  struct op_##FUNC { \
280  template <typename T1, typename T2> \
281  auto operator()(const T1& x, const T2& y) -> decltype(cl::sycl::FUNC(x, y)) { \
282  return cl::sycl::FUNC(x, y); \
283  } \
284  template <typename T1, typename T2> \
285  auto operator()(const TensorMap<T1>& x, const TensorMap<T2>& y) -> decltype(x.FUNC(y)) { \
286  return x.FUNC(y); \
287  } \
288  };
289 
292 
293 #define DECLARE_BINARY_STRUCT_OP(NAME, OPERATOR) \
294  struct op_##NAME { \
295  template <typename T1, typename T2> \
296  auto operator()(const T1& x, const T2& y) -> decltype(x OPERATOR y) { \
297  return x OPERATOR y; \
298  } \
299  };
300 
302 DECLARE_BINARY_STRUCT_OP(minus, -)
303 DECLARE_BINARY_STRUCT_OP(times, *)
304 DECLARE_BINARY_STRUCT_OP(divide, /)
305 DECLARE_BINARY_STRUCT_OP(modulo, %)
306 
307 template <typename DataType, int DataLayout>
308 void test_binary_builtins(const Eigen::SyclDevice& sycl_device,
309  const array<int64_t, 3>& tensor_range) {
311  op_cwiseMax>(sycl_device, tensor_range);
313  op_cwiseMin>(sycl_device, tensor_range);
315  op_plus>(sycl_device, tensor_range);
317  op_minus>(sycl_device, tensor_range);
319  op_times>(sycl_device, tensor_range);
321  op_divide>(sycl_device, tensor_range);
322 }
323 
324 template <typename DataType>
325 static void test_floating_builtin_binary_sycl(const Eigen::SyclDevice &sycl_device) {
326  int64_t sizeDim1 = 10;
327  int64_t sizeDim2 = 10;
328  int64_t sizeDim3 = 10;
329  array<int64_t, 3> tensor_range = {{sizeDim1, sizeDim2, sizeDim3}};
330  test_binary_builtins<DataType, RowMajor>(sycl_device, tensor_range);
331  test_binary_builtins<DataType, ColMajor>(sycl_device, tensor_range);
332 }
333 
334 template <typename DataType>
335 static void test_integer_builtin_binary_sycl(const Eigen::SyclDevice &sycl_device) {
336  int64_t sizeDim1 = 10;
337  int64_t sizeDim2 = 10;
338  int64_t sizeDim3 = 10;
339  array<int64_t, 3> tensor_range = {{sizeDim1, sizeDim2, sizeDim3}};
341  op_modulo>(sycl_device, tensor_range);
343  op_modulo>(sycl_device, tensor_range);
344 }
345 
346 EIGEN_DECLARE_TEST(cxx11_tensor_builtins_sycl) {
347  for (const auto& device :Eigen::get_sycl_supported_devices()) {
348  QueueInterface queueInterface(device);
349  Eigen::SyclDevice sycl_device(&queueInterface);
350  CALL_SUBTEST_1(test_builtin_unary_sycl<float>(sycl_device));
351  CALL_SUBTEST_2(test_floating_builtin_binary_sycl<float>(sycl_device));
352  CALL_SUBTEST_3(test_integer_builtin_binary_sycl<int>(sycl_device));
353  }
354 }
EIGEN_DEVICE_FUNC const Log1pReturnType log1p() const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const
Definition: Tensor.h:103
int array[24]
#define max(a, b)
Definition: datatypes.h:20
#define DECLARE_BINARY_STRUCT(FUNC)
EIGEN_DECLARE_TEST(cxx11_tensor_builtins_sycl)
Scalar * y
GpuHelper gpu
Definition: gpuhelper.cpp:18
std::ofstream out("Result.txt")
#define min(a, b)
Definition: datatypes.h:19
#define CALL_SUBTEST_3(FUNC)
static void test_builtin_unary_sycl(const Eigen::SyclDevice &sycl_device)
static void test_integer_builtin_binary_sycl(const Eigen::SyclDevice &sycl_device)
EIGEN_DEVICE_FUNC const TanhReturnType tanh() const
Namespace containing all symbols from the Eigen library.
Definition: jet.h:637
void test_unary_builtins_for_scalar(const Eigen::SyclDevice &sycl_device, const array< int64_t, 3 > &tensor_range)
void test_binary_builtins_func(const Eigen::SyclDevice &sycl_device, const array< int64_t, 3 > &tensor_range)
void operator()(Lhs &lhs, const Rhs &rhs)
#define isfinite(X)
Definition: main.h:95
#define isinf(X)
Definition: main.h:94
Real fabs(const Real &a)
EIGEN_DEVICE_FUNC const LogReturnType log() const
#define DECLARE_UNARY_STRUCT(FUNC)
EIGEN_DEVICE_FUNC const ExpReturnType exp() const
EIGEN_DEVICE_FUNC const FloorReturnType floor() const
void operator()(Lhs &lhs, const Rhs &rhs)
#define VERIFY_IS_APPROX(a, b)
#define VERIFY_IS_EQUAL(a, b)
Definition: main.h:386
EIGEN_DEVICE_FUNC const Expm1ReturnType expm1() const
void test_unary_builtins_return_bool(const Eigen::SyclDevice &sycl_device, const array< int64_t, 3 > &tensor_range)
#define CALL_SUBTEST_1(FUNC)
A tensor expression mapping an existing array of data.
#define RUN_UNARY_TEST(FUNC)
void test_binary_builtins_fixed_arg2(const Eigen::SyclDevice &sycl_device, const array< int64_t, 3 > &tensor_range)
void test_unary_builtins_for_assignement(const Eigen::SyclDevice &sycl_device, const array< int64_t, 3 > &tensor_range)
signed __int64 int64_t
Definition: ms_stdint.h:94
EIGEN_DEVICE_FUNC const SignReturnType sign() const
Eigen::Triplet< double > T
#define DECLARE_BINARY_STRUCT_OP(NAME, OPERATOR)
static void test_floating_builtin_binary_sycl(const Eigen::SyclDevice &sycl_device)
void test_binary_builtins(const Eigen::SyclDevice &sycl_device, const array< int64_t, 3 > &tensor_range)
TensorDevice< TensorMap< PlainObjectType, Options_, MakePointer_ >, DeviceType > device(const DeviceType &dev)
Definition: TensorBase.h:1145
EIGEN_DEVICE_FUNC const RsqrtReturnType rsqrt() const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar * data()
Definition: Tensor.h:104
#define CALL_SUBTEST_2(FUNC)
Jet< T, N > sqrt(const Jet< T, N > &f)
Definition: jet.h:418
void test_unary_builtins(const Eigen::SyclDevice &sycl_device, const array< int64_t, 3 > &tensor_range)
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy x
EIGEN_DEVICE_FUNC const CeilReturnType ceil() const
static const int DataLayout
The tensor class.
Definition: Tensor.h:63
#define isnan(X)
Definition: main.h:93
EIGEN_DEVICE_FUNC const RoundReturnType round() const


gtsam
Author(s):
autogenerated on Tue Jul 4 2023 02:34:07