14 #define EIGEN_TEST_NO_LONGDOUBLE 15 #define EIGEN_TEST_NO_COMPLEX 17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t 18 #define EIGEN_USE_SYCL 21 #include <unsupported/Eigen/CXX11/Tensor> 24 using Eigen::SyclDevice;
34 template <
typename T>
T cube(
T x) {
return x * x *
x; }
42 template <
typename Lhs,
typename Rhs>
47 template <
typename Lhs,
typename Rhs>
52 typename Assignement,
typename Operator>
61 in = in.random() + DataType(0.01);
62 out = out.random() + DataType(0.01);
64 DataType *gpu_data =
static_cast<DataType *
>(
65 sycl_device.allocate(in.
size() *
sizeof(DataType)));
66 DataType *gpu_data_out =
static_cast<DataType *
>(
67 sycl_device.allocate(out.
size() *
sizeof(DataType)));
70 sycl_device.memcpyHostToDevice(gpu_data, in.
data(),
71 (in.
size()) *
sizeof(DataType));
72 sycl_device.memcpyHostToDevice(gpu_data_out, out.
data(),
73 (out.
size()) *
sizeof(DataType));
74 auto device_expr = gpu_out.
device(sycl_device);
75 asgn(device_expr, op(gpu));
76 sycl_device.memcpyDeviceToHost(out.
data(), gpu_data_out,
77 (out.
size()) *
sizeof(DataType));
79 DataType ver = reference(
i);
83 sycl_device.deallocate(gpu_data);
84 sycl_device.deallocate(gpu_data_out);
89 out = out.random() + DataType(0.01);
91 DataType *gpu_data_out =
static_cast<DataType *
>(
92 sycl_device.allocate(out.
size() *
sizeof(DataType)));
94 sycl_device.memcpyHostToDevice(gpu_data_out, out.
data(),
95 (out.
size()) *
sizeof(DataType));
96 auto device_expr = gpu_out.
device(sycl_device);
97 asgn(device_expr, op(gpu_out));
98 sycl_device.memcpyDeviceToHost(out.
data(), gpu_data_out,
99 (out.
size()) *
sizeof(DataType));
101 DataType ver = reference(
i);
102 asgn(ver, op(reference(
i)));
105 sycl_device.deallocate(gpu_data_out);
109 #define DECLARE_UNARY_STRUCT(FUNC) \ 111 template <typename T> \ 112 auto operator()(const T& x) -> decltype(cl::sycl::FUNC(x)) { \ 113 return cl::sycl::FUNC(x); \ 115 template <typename T> \ 116 auto operator()(const TensorMap<T>& x) -> decltype(x.FUNC()) { \ 140 template <
typename DataType,
int DataLayout,
typename Assignement>
143 #define RUN_UNARY_TEST(FUNC) \ 144 test_unary_builtins_for_scalar<DataType, DataLayout, Assignement, \ 145 op_##FUNC>(sycl_device, tensor_range) 163 template <
typename DataType,
int DataLayout,
typename Operator>
170 in = in.random() + DataType(0.01);
171 DataType *gpu_data =
static_cast<DataType *
>(
172 sycl_device.allocate(in.
size() *
sizeof(DataType)));
174 static_cast<bool *
>(sycl_device.allocate(out.
size() *
sizeof(bool)));
177 sycl_device.memcpyHostToDevice(gpu_data, in.
data(),
178 (in.
size()) *
sizeof(DataType));
179 gpu_out.
device(sycl_device) = op(gpu);
180 sycl_device.memcpyDeviceToHost(out.
data(), gpu_data_out,
181 (out.
size()) *
sizeof(
bool));
185 sycl_device.deallocate(gpu_data);
186 sycl_device.deallocate(gpu_data_out);
189 template <
typename DataType,
int DataLayout>
197 op_isnan>(sycl_device, tensor_range);
199 op_isfinite>(sycl_device, tensor_range);
201 op_isinf>(sycl_device, tensor_range);
204 template <
typename DataType>
211 test_unary_builtins<DataType, RowMajor>(sycl_device, tensor_range);
212 test_unary_builtins<DataType, ColMajor>(sycl_device, tensor_range);
215 template <
typename DataType,
int DataLayout,
typename Operator>
223 in_1 = in_1.random() + DataType(0.01);
224 in_2 = in_2.random() + DataType(0.01);
226 DataType *gpu_data_1 =
static_cast<DataType *
>(
227 sycl_device.allocate(in_1.
size() *
sizeof(DataType)));
228 DataType *gpu_data_2 =
static_cast<DataType *
>(
229 sycl_device.allocate(in_2.
size() *
sizeof(DataType)));
230 DataType *gpu_data_out =
static_cast<DataType *
>(
231 sycl_device.allocate(out.
size() *
sizeof(DataType)));
235 sycl_device.memcpyHostToDevice(gpu_data_1, in_1.
data(),
236 (in_1.
size()) *
sizeof(DataType));
237 sycl_device.memcpyHostToDevice(gpu_data_2, in_2.
data(),
238 (in_2.
size()) *
sizeof(DataType));
239 gpu_out.
device(sycl_device) = op(gpu_1, gpu_2);
240 sycl_device.memcpyDeviceToHost(out.
data(), gpu_data_out,
241 (out.
size()) *
sizeof(DataType));
245 sycl_device.deallocate(gpu_data_1);
246 sycl_device.deallocate(gpu_data_2);
247 sycl_device.deallocate(gpu_data_out);
250 template <
typename DataType,
int DataLayout,
typename Operator>
255 const DataType arg2(2);
258 in_1 = in_1.random();
260 DataType *gpu_data_1 =
static_cast<DataType *
>(
261 sycl_device.allocate(in_1.
size() *
sizeof(DataType)));
262 DataType *gpu_data_out =
static_cast<DataType *
>(
263 sycl_device.allocate(out.
size() *
sizeof(DataType)));
266 sycl_device.memcpyHostToDevice(gpu_data_1, in_1.
data(),
267 (in_1.
size()) *
sizeof(DataType));
268 gpu_out.
device(sycl_device) = op(gpu_1, arg2);
269 sycl_device.memcpyDeviceToHost(out.
data(), gpu_data_out,
270 (out.
size()) *
sizeof(DataType));
274 sycl_device.deallocate(gpu_data_1);
275 sycl_device.deallocate(gpu_data_out);
278 #define DECLARE_BINARY_STRUCT(FUNC) \ 280 template <typename T1, typename T2> \ 281 auto operator()(const T1& x, const T2& y) -> decltype(cl::sycl::FUNC(x, y)) { \ 282 return cl::sycl::FUNC(x, y); \ 284 template <typename T1, typename T2> \ 285 auto operator()(const TensorMap<T1>& x, const TensorMap<T2>& y) -> decltype(x.FUNC(y)) { \ 293 #define DECLARE_BINARY_STRUCT_OP(NAME, OPERATOR) \ 295 template <typename T1, typename T2> \ 296 auto operator()(const T1& x, const T2& y) -> decltype(x OPERATOR y) { \ 297 return x OPERATOR y; \ 311 op_cwiseMax>(sycl_device, tensor_range);
313 op_cwiseMin>(sycl_device, tensor_range);
315 op_plus>(sycl_device, tensor_range);
317 op_minus>(sycl_device, tensor_range);
319 op_times>(sycl_device, tensor_range);
321 op_divide>(sycl_device, tensor_range);
324 template <
typename DataType>
330 test_binary_builtins<DataType, RowMajor>(sycl_device, tensor_range);
331 test_binary_builtins<DataType, ColMajor>(sycl_device, tensor_range);
334 template <
typename DataType>
341 op_modulo>(sycl_device, tensor_range);
343 op_modulo>(sycl_device, tensor_range);
347 for (
const auto& device :Eigen::get_sycl_supported_devices()) {
348 QueueInterface queueInterface(device);
349 Eigen::SyclDevice sycl_device(&queueInterface);
351 CALL_SUBTEST_2(test_floating_builtin_binary_sycl<float>(sycl_device));
352 CALL_SUBTEST_3(test_integer_builtin_binary_sycl<int>(sycl_device));
EIGEN_DEVICE_FUNC const Log1pReturnType log1p() const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const
#define DECLARE_BINARY_STRUCT(FUNC)
EIGEN_DECLARE_TEST(cxx11_tensor_builtins_sycl)
std::ofstream out("Result.txt")
#define CALL_SUBTEST_3(FUNC)
static void test_builtin_unary_sycl(const Eigen::SyclDevice &sycl_device)
static void test_integer_builtin_binary_sycl(const Eigen::SyclDevice &sycl_device)
EIGEN_DEVICE_FUNC const TanhReturnType tanh() const
Namespace containing all symbols from the Eigen library.
void test_unary_builtins_for_scalar(const Eigen::SyclDevice &sycl_device, const array< int64_t, 3 > &tensor_range)
void test_binary_builtins_func(const Eigen::SyclDevice &sycl_device, const array< int64_t, 3 > &tensor_range)
void operator()(Lhs &lhs, const Rhs &rhs)
EIGEN_DEVICE_FUNC const LogReturnType log() const
#define DECLARE_UNARY_STRUCT(FUNC)
EIGEN_DEVICE_FUNC const ExpReturnType exp() const
EIGEN_DEVICE_FUNC const FloorReturnType floor() const
void operator()(Lhs &lhs, const Rhs &rhs)
#define VERIFY_IS_APPROX(a, b)
#define VERIFY_IS_EQUAL(a, b)
EIGEN_DEVICE_FUNC const Expm1ReturnType expm1() const
void test_unary_builtins_return_bool(const Eigen::SyclDevice &sycl_device, const array< int64_t, 3 > &tensor_range)
#define CALL_SUBTEST_1(FUNC)
A tensor expression mapping an existing array of data.
#define RUN_UNARY_TEST(FUNC)
void test_binary_builtins_fixed_arg2(const Eigen::SyclDevice &sycl_device, const array< int64_t, 3 > &tensor_range)
void test_unary_builtins_for_assignement(const Eigen::SyclDevice &sycl_device, const array< int64_t, 3 > &tensor_range)
EIGEN_DEVICE_FUNC const SignReturnType sign() const
Eigen::Triplet< double > T
#define DECLARE_BINARY_STRUCT_OP(NAME, OPERATOR)
static void test_floating_builtin_binary_sycl(const Eigen::SyclDevice &sycl_device)
void test_binary_builtins(const Eigen::SyclDevice &sycl_device, const array< int64_t, 3 > &tensor_range)
TensorDevice< TensorMap< PlainObjectType, Options_, MakePointer_ >, DeviceType > device(const DeviceType &dev)
EIGEN_DEVICE_FUNC const RsqrtReturnType rsqrt() const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar * data()
#define CALL_SUBTEST_2(FUNC)
Jet< T, N > sqrt(const Jet< T, N > &f)
void test_unary_builtins(const Eigen::SyclDevice &sycl_device, const array< int64_t, 3 > &tensor_range)
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy x
EIGEN_DEVICE_FUNC const CeilReturnType ceil() const
static const int DataLayout
EIGEN_DEVICE_FUNC const RoundReturnType round() const