14 #define EIGEN_TEST_NO_LONGDOUBLE
15 #define EIGEN_TEST_NO_COMPLEX
17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
18 #define EIGEN_USE_SYCL
21 #include <unsupported/Eigen/CXX11/Tensor>
24 using Eigen::SyclDevice;
28 template <
typename DataType,
int DataLayout,
typename IndexType>
43 for (
size_t i = 0;
i < out_range.
size(); ++
i)
44 out_range[
i] = in_range[
i] * broadcasts[
i];
49 for (
size_t i = 0;
i < in_range.
size(); ++
i)
53 for (IndexType
i = 0;
i < input.
size(); ++
i)
54 input(
i) =
static_cast<DataType
>(
i);
56 DataType * gpu_in_data =
static_cast<DataType*
>(sycl_device.allocate(input.
dimensions().
TotalSize()*
sizeof(DataType)));
57 DataType * gpu_out_data =
static_cast<DataType*
>(sycl_device.allocate(
out.dimensions().TotalSize()*
sizeof(DataType)));
62 gpu_out.device(sycl_device) = gpu_in.broadcast(broadcasts);
63 sycl_device.memcpyDeviceToHost(
out.data(), gpu_out_data,(
out.dimensions().TotalSize())*
sizeof(DataType));
65 for (IndexType
i = 0;
i < inDim1*bDim1; ++
i) {
66 for (IndexType
j = 0;
j < inDim2*bDim2; ++
j) {
67 for (IndexType k = 0; k < inDim3*bDim3; ++k) {
68 for (IndexType
l = 0;
l < inDim4*bDim4; ++
l) {
74 printf(
"Broadcast Test with fixed size Passed\n");
75 sycl_device.deallocate(gpu_in_data);
76 sycl_device.deallocate(gpu_out_data);
79 template <
typename DataType,
int DataLayout,
typename IndexType>
94 for (
size_t i = 0;
i < out_range.
size(); ++
i)
95 out_range[
i] = in_range[
i] * broadcasts[
i];
100 for (
size_t i = 0;
i < in_range.
size(); ++
i)
104 for (IndexType
i = 0;
i < input.
size(); ++
i)
105 input(
i) =
static_cast<DataType
>(
i);
107 DataType * gpu_in_data =
static_cast<DataType*
>(sycl_device.allocate(input.
dimensions().
TotalSize()*
sizeof(DataType)));
108 DataType * gpu_out_data =
static_cast<DataType*
>(sycl_device.allocate(
out.dimensions().TotalSize()*
sizeof(DataType)));
113 gpu_out.device(sycl_device) = gpu_in.broadcast(broadcasts);
114 sycl_device.memcpyDeviceToHost(
out.data(), gpu_out_data,(
out.dimensions().TotalSize())*
sizeof(DataType));
116 for (IndexType
i = 0;
i < inDim1*bDim1; ++
i) {
117 for (IndexType
j = 0;
j < inDim2*bDim2; ++
j) {
118 for (IndexType k = 0; k < inDim3*bDim3; ++k) {
119 for (IndexType
l = 0;
l < inDim4*bDim4; ++
l) {
125 printf(
"Broadcast Test Passed\n");
126 sycl_device.deallocate(gpu_in_data);
127 sycl_device.deallocate(gpu_out_data);
131 std::cout <<
"Running on " <<
d.template get_info<cl::sycl::info::device::name>() << std::endl;
132 QueueInterface queueInterface(
d);
133 auto sycl_device = Eigen::SyclDevice(&queueInterface);
134 test_broadcast_sycl<DataType, RowMajor, int64_t>(sycl_device);
135 test_broadcast_sycl<DataType, ColMajor, int64_t>(sycl_device);
136 test_broadcast_sycl_fixed<DataType, RowMajor, int64_t>(sycl_device);
137 test_broadcast_sycl_fixed<DataType, ColMajor, int64_t>(sycl_device);
141 for (
const auto& device :Eigen::get_sycl_supported_devices()) {
142 CALL_SUBTEST(sycl_broadcast_test_per_device<float>(device));