14 #define EIGEN_TEST_NO_LONGDOUBLE
15 #define EIGEN_TEST_NO_COMPLEX
17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
18 #define EIGEN_USE_SYCL
21 #include <unsupported/Eigen/CXX11/Tensor>
24 using Eigen::SyclDevice;
42 template <
typename Lhs,
typename Rhs>
47 template <
typename Lhs,
typename Rhs>
52 typename Assignement,
typename Operator>
61 in = in.random() + DataType(0.01);
62 out =
out.random() + DataType(0.01);
64 DataType *gpu_data =
static_cast<DataType *
>(
65 sycl_device.allocate(in.
size() *
sizeof(DataType)));
66 DataType *gpu_data_out =
static_cast<DataType *
>(
67 sycl_device.allocate(
out.size() *
sizeof(DataType)));
70 sycl_device.memcpyHostToDevice(gpu_data, in.
data(),
71 (in.
size()) *
sizeof(DataType));
72 sycl_device.memcpyHostToDevice(gpu_data_out,
out.data(),
73 (
out.size()) *
sizeof(DataType));
74 auto device_expr = gpu_out.device(sycl_device);
75 asgn(device_expr, op(
gpu));
76 sycl_device.memcpyDeviceToHost(
out.data(), gpu_data_out,
77 (
out.size()) *
sizeof(DataType));
83 sycl_device.deallocate(gpu_data);
84 sycl_device.deallocate(gpu_data_out);
89 out =
out.random() + DataType(0.01);
91 DataType *gpu_data_out =
static_cast<DataType *
>(
92 sycl_device.allocate(
out.size() *
sizeof(DataType)));
94 sycl_device.memcpyHostToDevice(gpu_data_out,
out.data(),
95 (
out.size()) *
sizeof(DataType));
96 auto device_expr = gpu_out.device(sycl_device);
97 asgn(device_expr, op(gpu_out));
98 sycl_device.memcpyDeviceToHost(
out.data(), gpu_data_out,
99 (
out.size()) *
sizeof(DataType));
105 sycl_device.deallocate(gpu_data_out);
109 #define DECLARE_UNARY_STRUCT(FUNC) \
111 template <typename T> \
112 auto operator()(const T& x) -> decltype(cl::sycl::FUNC(x)) { \
113 return cl::sycl::FUNC(x); \
115 template <typename T> \
116 auto operator()(const TensorMap<T>& x) -> decltype(x.FUNC()) { \
140 template <
typename DataType,
int DataLayout,
typename Assignement>
143 #define RUN_UNARY_TEST(FUNC) \
144 test_unary_builtins_for_scalar<DataType, DataLayout, Assignement, \
145 op_##FUNC>(sycl_device, tensor_range)
163 template <
typename DataType,
int DataLayout,
typename Operator>
170 in = in.random() + DataType(0.01);
171 DataType *gpu_data =
static_cast<DataType *
>(
172 sycl_device.allocate(in.
size() *
sizeof(DataType)));
174 static_cast<bool *
>(sycl_device.allocate(
out.size() *
sizeof(bool)));
177 sycl_device.memcpyHostToDevice(gpu_data, in.
data(),
178 (in.
size()) *
sizeof(DataType));
179 gpu_out.device(sycl_device) = op(
gpu);
180 sycl_device.memcpyDeviceToHost(
out.data(), gpu_data_out,
181 (
out.size()) *
sizeof(
bool));
185 sycl_device.deallocate(gpu_data);
186 sycl_device.deallocate(gpu_data_out);
189 template <
typename DataType,
int DataLayout>
197 op_isnan>(sycl_device, tensor_range);
199 op_isfinite>(sycl_device, tensor_range);
201 op_isinf>(sycl_device, tensor_range);
204 template <
typename DataType>
211 test_unary_builtins<DataType, RowMajor>(sycl_device, tensor_range);
212 test_unary_builtins<DataType, ColMajor>(sycl_device, tensor_range);
215 template <
typename DataType,
int DataLayout,
typename Operator>
223 in_1 = in_1.random() + DataType(0.01);
224 in_2 = in_2.random() + DataType(0.01);
226 DataType *gpu_data_1 =
static_cast<DataType *
>(
227 sycl_device.allocate(in_1.
size() *
sizeof(DataType)));
228 DataType *gpu_data_2 =
static_cast<DataType *
>(
229 sycl_device.allocate(in_2.
size() *
sizeof(DataType)));
230 DataType *gpu_data_out =
static_cast<DataType *
>(
231 sycl_device.allocate(
out.size() *
sizeof(DataType)));
235 sycl_device.memcpyHostToDevice(gpu_data_1, in_1.
data(),
236 (in_1.
size()) *
sizeof(DataType));
237 sycl_device.memcpyHostToDevice(gpu_data_2, in_2.
data(),
238 (in_2.
size()) *
sizeof(DataType));
239 gpu_out.device(sycl_device) = op(gpu_1, gpu_2);
240 sycl_device.memcpyDeviceToHost(
out.data(), gpu_data_out,
241 (
out.size()) *
sizeof(DataType));
245 sycl_device.deallocate(gpu_data_1);
246 sycl_device.deallocate(gpu_data_2);
247 sycl_device.deallocate(gpu_data_out);
250 template <
typename DataType,
int DataLayout,
typename Operator>
255 const DataType arg2(2);
258 in_1 = in_1.random();
260 DataType *gpu_data_1 =
static_cast<DataType *
>(
261 sycl_device.allocate(in_1.
size() *
sizeof(DataType)));
262 DataType *gpu_data_out =
static_cast<DataType *
>(
263 sycl_device.allocate(
out.size() *
sizeof(DataType)));
266 sycl_device.memcpyHostToDevice(gpu_data_1, in_1.
data(),
267 (in_1.
size()) *
sizeof(DataType));
268 gpu_out.device(sycl_device) = op(gpu_1, arg2);
269 sycl_device.memcpyDeviceToHost(
out.data(), gpu_data_out,
270 (
out.size()) *
sizeof(DataType));
274 sycl_device.deallocate(gpu_data_1);
275 sycl_device.deallocate(gpu_data_out);
278 #define DECLARE_BINARY_STRUCT(FUNC) \
280 template <typename T1, typename T2> \
281 auto operator()(const T1& x, const T2& y) -> decltype(cl::sycl::FUNC(x, y)) { \
282 return cl::sycl::FUNC(x, y); \
284 template <typename T1, typename T2> \
285 auto operator()(const TensorMap<T1>& x, const TensorMap<T2>& y) -> decltype(x.FUNC(y)) { \
293 #define DECLARE_BINARY_STRUCT_OP(NAME, OPERATOR) \
295 template <typename T1, typename T2> \
296 auto operator()(const T1& x, const T2& y) -> decltype(x OPERATOR y) { \
297 return x OPERATOR y; \
311 op_cwiseMax>(sycl_device, tensor_range);
313 op_cwiseMin>(sycl_device, tensor_range);
315 op_plus>(sycl_device, tensor_range);
317 op_minus>(sycl_device, tensor_range);
319 op_times>(sycl_device, tensor_range);
321 op_divide>(sycl_device, tensor_range);
324 template <
typename DataType>
330 test_binary_builtins<DataType, RowMajor>(sycl_device, tensor_range);
331 test_binary_builtins<DataType, ColMajor>(sycl_device, tensor_range);
334 template <
typename DataType>
341 op_modulo>(sycl_device, tensor_range);
343 op_modulo>(sycl_device, tensor_range);
347 for (
const auto& device :Eigen::get_sycl_supported_devices()) {
348 QueueInterface queueInterface(device);
349 Eigen::SyclDevice sycl_device(&queueInterface);
351 CALL_SUBTEST_2(test_floating_builtin_binary_sycl<float>(sycl_device));
352 CALL_SUBTEST_3(test_integer_builtin_binary_sycl<int>(sycl_device));