14 #define EIGEN_TEST_NO_LONGDOUBLE
15 #define EIGEN_TEST_NO_COMPLEX
17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
18 #define EIGEN_USE_SYCL
21 #include <unsupported/Eigen/CXX11/Tensor>
24 template<
typename TensorType>
28 result[0] = input.dimension(0) * 2;
29 result[1] = input.dimension(1) * 2;
33 template <
typename Output,
typename Device>
34 void eval(
const TensorType& input, Output& output,
const Device& device)
const
39 output.stride(
strides).device(device) = input;
43 output.slice(
offsets, extents).stride(
strides).device(device) = input.constant(0.0
f);
47 template<
typename DataType,
int DataLayout,
typename IndexType>
50 IndexType sizeDim1 = 3;
51 IndexType sizeDim2 = 5;
58 DataType * gpu_in1_data =
static_cast<DataType*
>(sycl_device.allocate(in1.
dimensions().
TotalSize()*
sizeof(DataType)));
59 DataType * gpu_out_data =
static_cast<DataType*
>(sycl_device.allocate(
out.dimensions().TotalSize()*
sizeof(DataType)));
62 TensorType gpu_in1(gpu_in1_data, tensorRange);
63 TensorType gpu_out(gpu_out_data, tensorResultRange);
68 sycl_device.memcpyDeviceToHost(
out.data(), gpu_out_data,(
out.dimensions().TotalSize())*
sizeof(DataType));
73 for (
int i = 0;
i < 6;
i+=2) {
74 for (
int j = 0;
j < 10;
j+=2) {
78 for (
int i = 1;
i < 6;
i+=2) {
79 for (
int j = 1;
j < 10;
j+=2) {
83 sycl_device.deallocate(gpu_in1_data);
84 sycl_device.deallocate(gpu_out_data);
87 template<
typename TensorType>
91 result[0] = input1.dimension(0);
92 result[1] = input2.dimension(1);
93 result[2] = input2.dimension(2);
97 template <
typename Output,
typename Device>
98 void eval(
const TensorType& input1,
const TensorType& input2,
99 Output& output,
const Device& device)
const
101 typedef typename TensorType::DimensionPair
DimPair;
104 for (
int64_t i = 0;
i < output.dimension(2); ++
i) {
105 output.template chip<2>(
i).device(device) = input1.template chip<2>(
i).contract(input2.template chip<2>(
i), dims);
110 template<
typename DataType,
int DataLayout,
typename IndexType>
122 DataType * gpu_in1_data =
static_cast<DataType*
>(sycl_device.allocate(in1.
dimensions().
TotalSize()*
sizeof(DataType)));
123 DataType * gpu_in2_data =
static_cast<DataType*
>(sycl_device.allocate(in2.
dimensions().
TotalSize()*
sizeof(DataType)));
124 DataType * gpu_out_data =
static_cast<DataType*
>(sycl_device.allocate(
out.dimensions().TotalSize()*
sizeof(DataType)));
127 TensorType gpu_in1(gpu_in1_data, tensorRange1);
128 TensorType gpu_in2(gpu_in2_data, tensorRange2);
129 TensorType gpu_out(gpu_out_data, tensorResultRange);
138 sycl_device.memcpyDeviceToHost(
out.data(), gpu_out_data,(
out.dimensions().TotalSize())*
sizeof(DataType));
140 for (IndexType
i = 0;
i < 5; ++
i) {
146 for (IndexType
j = 0;
j < 2; ++
j) {
147 for (IndexType k = 0; k < 7; ++k) {
152 sycl_device.deallocate(gpu_in1_data);
153 sycl_device.deallocate(gpu_in2_data);
154 sycl_device.deallocate(gpu_out_data);
158 QueueInterface queueInterface(
s);
159 auto sycl_device = Eigen::SyclDevice(&queueInterface);
160 test_custom_unary_op_sycl<DataType, RowMajor, int64_t>(sycl_device);
161 test_custom_unary_op_sycl<DataType, ColMajor, int64_t>(sycl_device);
162 test_custom_binary_op_sycl<DataType, ColMajor, int64_t>(sycl_device);
163 test_custom_binary_op_sycl<DataType, RowMajor, int64_t>(sycl_device);
167 for (
const auto& device :Eigen::get_sycl_supported_devices()) {