14 #define EIGEN_TEST_NO_LONGDOUBLE 
   15 #define EIGEN_TEST_NO_COMPLEX 
   17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t 
   18 #define EIGEN_USE_SYCL 
   21 #include <unsupported/Eigen/CXX11/Tensor> 
   25 template<
typename DataType, 
int DataLayout, 
typename IndexType>
 
   28   IndexType leftDim1 = 2;
 
   29   IndexType leftDim2 = 3;
 
   30   IndexType leftDim3 = 1;
 
   32   IndexType rightDim1 = 2;
 
   33   IndexType rightDim2 = 3;
 
   34   IndexType rightDim3 = 1;
 
   47   DataType * gpu_in1_data  = 
static_cast<DataType*
>(sycl_device.allocate(
left.dimensions().TotalSize()*
sizeof(DataType)));
 
   48   DataType * gpu_in2_data  = 
static_cast<DataType*
>(sycl_device.allocate(
right.dimensions().TotalSize()*
sizeof(DataType)));
 
   52   sycl_device.memcpyHostToDevice(gpu_in1_data, 
left.data(),(
left.dimensions().TotalSize())*
sizeof(DataType));
 
   53   sycl_device.memcpyHostToDevice(gpu_in2_data, 
right.data(),(
right.dimensions().TotalSize())*
sizeof(DataType));
 
   56   DataType * gpu_out_data1 =  
static_cast<DataType*
>(sycl_device.allocate(concatenation1.
dimensions().
TotalSize()*
sizeof(DataType)));
 
   60   gpu_out1.device(sycl_device) =gpu_in1.concatenate(gpu_in2, 0);
 
   61   sycl_device.memcpyDeviceToHost(concatenation1.
data(), gpu_out_data1,(concatenation1.
dimensions().
TotalSize())*
sizeof(DataType));
 
   66   for (IndexType 
j = 0; 
j < 3; ++
j) {
 
   67     for (IndexType 
i = 0; 
i < 2; ++
i) {
 
   70     for (IndexType 
i = 2; 
i < 4; ++
i) {
 
   75   sycl_device.deallocate(gpu_out_data1);
 
   77   DataType * gpu_out_data2 =  
static_cast<DataType*
>(sycl_device.allocate(concatenation2.
dimensions().
TotalSize()*
sizeof(DataType)));
 
   79   gpu_out2.device(sycl_device) =gpu_in1.concatenate(gpu_in2, 1);
 
   80   sycl_device.memcpyDeviceToHost(concatenation2.
data(), gpu_out_data2,(concatenation2.
dimensions().
TotalSize())*
sizeof(DataType));
 
   86   for (IndexType 
i = 0; 
i < 2; ++
i) {
 
   87     for (IndexType 
j = 0; 
j < 3; ++
j) {
 
   90     for (IndexType 
j = 3; 
j < 6; ++
j) {
 
   94   sycl_device.deallocate(gpu_out_data2);
 
   96   DataType * gpu_out_data3 =  
static_cast<DataType*
>(sycl_device.allocate(concatenation3.
dimensions().
TotalSize()*
sizeof(DataType)));
 
   98   gpu_out3.device(sycl_device) =gpu_in1.concatenate(gpu_in2, 2);
 
   99   sycl_device.memcpyDeviceToHost(concatenation3.
data(), gpu_out_data3,(concatenation3.
dimensions().
TotalSize())*
sizeof(DataType));
 
  105   for (IndexType 
i = 0; 
i < 2; ++
i) {
 
  106     for (IndexType 
j = 0; 
j < 3; ++
j) {
 
  111   sycl_device.deallocate(gpu_out_data3);
 
  112   sycl_device.deallocate(gpu_in1_data);
 
  113   sycl_device.deallocate(gpu_in2_data);
 
  115 template<
typename DataType, 
int DataLayout, 
typename IndexType>
 
  119   IndexType leftDim1 = 2;
 
  120   IndexType leftDim2 = 3;
 
  123   IndexType rightDim1 = 2;
 
  124   IndexType rightDim2 = 3;
 
  127   IndexType concatDim1 = 4;
 
  128   IndexType concatDim2 = 3;
 
  139   DataType * gpu_in1_data  = 
static_cast<DataType*
>(sycl_device.allocate(
left.dimensions().TotalSize()*
sizeof(DataType)));
 
  140   DataType * gpu_in2_data  = 
static_cast<DataType*
>(sycl_device.allocate(
right.dimensions().TotalSize()*
sizeof(DataType)));
 
  141   DataType * gpu_out_data =  
static_cast<DataType*
>(sycl_device.allocate(
result.dimensions().TotalSize()*
sizeof(DataType)));
 
  148   sycl_device.memcpyHostToDevice(gpu_in1_data, 
left.data(),(
left.dimensions().TotalSize())*
sizeof(DataType));
 
  149   sycl_device.memcpyHostToDevice(gpu_in2_data, 
right.data(),(
right.dimensions().TotalSize())*
sizeof(DataType));
 
  150   sycl_device.memcpyHostToDevice(gpu_out_data, 
result.data(),(
result.dimensions().TotalSize())*
sizeof(DataType));
 
  153  gpu_in1.concatenate(gpu_in2, 0).device(sycl_device) =gpu_out;
 
  154  sycl_device.memcpyDeviceToHost(
left.data(), gpu_in1_data,(
left.dimensions().TotalSize())*
sizeof(DataType));
 
  155  sycl_device.memcpyDeviceToHost(
right.data(), gpu_in2_data,(
right.dimensions().TotalSize())*
sizeof(DataType));
 
  157   for (IndexType 
i = 0; 
i < 2; ++
i) {
 
  158     for (IndexType 
j = 0; 
j < 3; ++
j) {
 
  163   sycl_device.deallocate(gpu_in1_data);
 
  164   sycl_device.deallocate(gpu_in2_data);
 
  165   sycl_device.deallocate(gpu_out_data);
 
  170   QueueInterface queueInterface(
s);
 
  171   auto sycl_device = Eigen::SyclDevice(&queueInterface);
 
  172   test_simple_concatenation<DataType, RowMajor, int64_t>(sycl_device);
 
  173   test_simple_concatenation<DataType, ColMajor, int64_t>(sycl_device);
 
  174   test_concatenation_as_lvalue<DataType, ColMajor, int64_t>(sycl_device);
 
  177   for (
const auto& device :Eigen::get_sycl_supported_devices()) {