14 #define EIGEN_TEST_NO_LONGDOUBLE 
   15 #define EIGEN_TEST_NO_COMPLEX 
   17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t 
   18 #define EIGEN_USE_SYCL 
   21 #include <unsupported/Eigen/CXX11/Tensor> 
   24 using Eigen::SyclDevice;
 
   28 template <
typename DataType, 
int DataLayout, 
typename IndexType>
 
   43   for (
size_t i = 0; 
i < out_range.
size(); ++
i)
 
   44     out_range[
i] = in_range[
i] * broadcasts[
i];
 
   49   for (
size_t i = 0; 
i < in_range.
size(); ++
i)
 
   53   for (IndexType 
i = 0; 
i < input.
size(); ++
i)
 
   54     input(
i) = 
static_cast<DataType
>(
i);
 
   56   DataType * gpu_in_data  = 
static_cast<DataType*
>(sycl_device.allocate(input.
dimensions().
TotalSize()*
sizeof(DataType)));
 
   57   DataType * gpu_out_data  = 
static_cast<DataType*
>(sycl_device.allocate(
out.dimensions().TotalSize()*
sizeof(DataType)));
 
   62   gpu_out.device(sycl_device) = gpu_in.broadcast(broadcasts);
 
   63   sycl_device.memcpyDeviceToHost(
out.data(), gpu_out_data,(
out.dimensions().TotalSize())*
sizeof(DataType));
 
   65   for (IndexType 
i = 0; 
i < inDim1*bDim1; ++
i) {
 
   66     for (IndexType 
j = 0; 
j < inDim2*bDim2; ++
j) {
 
   67       for (IndexType 
k = 0; 
k < inDim3*bDim3; ++
k) {
 
   68         for (IndexType 
l = 0; 
l < inDim4*bDim4; ++
l) {
 
   74   printf(
"Broadcast Test with fixed size Passed\n");
 
   75   sycl_device.deallocate(gpu_in_data);
 
   76   sycl_device.deallocate(gpu_out_data);
 
   79 template <
typename DataType, 
int DataLayout, 
typename IndexType>
 
   94   for (
size_t i = 0; 
i < out_range.
size(); ++
i)
 
   95     out_range[
i] = in_range[
i] * broadcasts[
i];
 
  100   for (
size_t i = 0; 
i < in_range.
size(); ++
i)
 
  104   for (IndexType 
i = 0; 
i < input.
size(); ++
i)
 
  105     input(
i) = 
static_cast<DataType
>(
i);
 
  107   DataType * gpu_in_data  = 
static_cast<DataType*
>(sycl_device.allocate(input.
dimensions().
TotalSize()*
sizeof(DataType)));
 
  108   DataType * gpu_out_data  = 
static_cast<DataType*
>(sycl_device.allocate(
out.dimensions().TotalSize()*
sizeof(DataType)));
 
  113   gpu_out.device(sycl_device) = gpu_in.broadcast(broadcasts);
 
  114   sycl_device.memcpyDeviceToHost(
out.data(), gpu_out_data,(
out.dimensions().TotalSize())*
sizeof(DataType));
 
  116   for (IndexType 
i = 0; 
i < inDim1*bDim1; ++
i) {
 
  117     for (IndexType 
j = 0; 
j < inDim2*bDim2; ++
j) {
 
  118       for (IndexType 
k = 0; 
k < inDim3*bDim3; ++
k) {
 
  119         for (IndexType 
l = 0; 
l < inDim4*bDim4; ++
l) {
 
  125   printf(
"Broadcast Test Passed\n");
 
  126   sycl_device.deallocate(gpu_in_data);
 
  127   sycl_device.deallocate(gpu_out_data);
 
  131   std::cout << 
"Running on " << 
d.template get_info<cl::sycl::info::device::name>() << std::endl;
 
  132   QueueInterface queueInterface(
d);
 
  133   auto sycl_device = Eigen::SyclDevice(&queueInterface);
 
  134   test_broadcast_sycl<DataType, RowMajor, int64_t>(sycl_device);
 
  135   test_broadcast_sycl<DataType, ColMajor, int64_t>(sycl_device);
 
  136   test_broadcast_sycl_fixed<DataType, RowMajor, int64_t>(sycl_device);
 
  137   test_broadcast_sycl_fixed<DataType, ColMajor, int64_t>(sycl_device);
 
  141   for (
const auto& device :Eigen::get_sycl_supported_devices()) {
 
  142     CALL_SUBTEST(sycl_broadcast_test_per_device<float>(device));