14 #define EIGEN_TEST_NO_LONGDOUBLE 15 #define EIGEN_TEST_NO_COMPLEX 17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t 18 #define EIGEN_USE_SYCL 21 #include <unsupported/Eigen/CXX11/Tensor> 24 using Eigen::SyclDevice;
28 template <
typename DataType,
int DataLayout,
typename IndexType>
43 for (
size_t i = 0;
i < out_range.
size(); ++
i)
44 out_range[
i] = in_range[
i] * broadcasts[
i];
49 for (
size_t i = 0;
i < in_range.
size(); ++
i)
53 for (IndexType
i = 0;
i < input.
size(); ++
i)
54 input(
i) =
static_cast<DataType
>(
i);
56 DataType * gpu_in_data =
static_cast<DataType*
>(sycl_device.allocate(input.
dimensions().
TotalSize()*
sizeof(DataType)));
57 DataType * gpu_out_data =
static_cast<DataType*
>(sycl_device.allocate(out.
dimensions().
TotalSize()*
sizeof(DataType)));
62 gpu_out.
device(sycl_device) = gpu_in.broadcast(broadcasts);
65 for (IndexType
i = 0;
i < inDim1*bDim1; ++
i) {
66 for (IndexType
j = 0;
j < inDim2*bDim2; ++
j) {
67 for (IndexType k = 0; k < inDim3*bDim3; ++k) {
68 for (IndexType
l = 0;
l < inDim4*bDim4; ++
l) {
74 printf(
"Broadcast Test with fixed size Passed\n");
75 sycl_device.deallocate(gpu_in_data);
76 sycl_device.deallocate(gpu_out_data);
79 template <
typename DataType,
int DataLayout,
typename IndexType>
94 for (
size_t i = 0;
i < out_range.
size(); ++
i)
95 out_range[
i] = in_range[
i] * broadcasts[
i];
100 for (
size_t i = 0;
i < in_range.
size(); ++
i)
104 for (IndexType
i = 0;
i < input.
size(); ++
i)
105 input(
i) =
static_cast<DataType
>(
i);
107 DataType * gpu_in_data =
static_cast<DataType*
>(sycl_device.allocate(input.
dimensions().
TotalSize()*
sizeof(DataType)));
108 DataType * gpu_out_data =
static_cast<DataType*
>(sycl_device.allocate(out.
dimensions().
TotalSize()*
sizeof(DataType)));
113 gpu_out.
device(sycl_device) = gpu_in.broadcast(broadcasts);
116 for (IndexType
i = 0;
i < inDim1*bDim1; ++
i) {
117 for (IndexType
j = 0;
j < inDim2*bDim2; ++
j) {
118 for (IndexType k = 0; k < inDim3*bDim3; ++k) {
119 for (IndexType
l = 0;
l < inDim4*bDim4; ++
l) {
125 printf(
"Broadcast Test Passed\n");
126 sycl_device.deallocate(gpu_in_data);
127 sycl_device.deallocate(gpu_out_data);
131 std::cout <<
"Running on " << d.template get_info<cl::sycl::info::device::name>() << std::endl;
132 QueueInterface queueInterface(d);
133 auto sycl_device = Eigen::SyclDevice(&queueInterface);
134 test_broadcast_sycl<DataType, RowMajor, int64_t>(sycl_device);
135 test_broadcast_sycl<DataType, ColMajor, int64_t>(sycl_device);
136 test_broadcast_sycl_fixed<DataType, RowMajor, int64_t>(sycl_device);
137 test_broadcast_sycl_fixed<DataType, ColMajor, int64_t>(sycl_device);
141 for (
const auto& device :Eigen::get_sycl_supported_devices()) {
142 CALL_SUBTEST(sycl_broadcast_test_per_device<float>(device));
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const
EIGEN_DECLARE_TEST(cxx11_tensor_broadcast_sycl)
EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE std::size_t size()
std::ofstream out("Result.txt")
#define VERIFY_IS_APPROX(a, b)
static const Line3 l(Rot3(), 1, 1)
#define VERIFY_IS_EQUAL(a, b)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex TotalSize() const
A tensor expression mapping an existing array of data.
TensorDevice< TensorMap< PlainObjectType, Options_, MakePointer_ >, DeviceType > device(const DeviceType &dev)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar * data()
#define CALL_SUBTEST(FUNC)
static void test_broadcast_sycl(const Eigen::SyclDevice &sycl_device)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index dimension(std::size_t n) const
void sycl_broadcast_test_per_device(const cl::sycl::device &d)
static void test_broadcast_sycl_fixed(const Eigen::SyclDevice &sycl_device)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
static const int DataLayout