14 #define EIGEN_TEST_NO_LONGDOUBLE 
   15 #define EIGEN_TEST_NO_COMPLEX 
   17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t 
   18 #define EIGEN_USE_SYCL 
   19 #define EIGEN_HAS_CONSTEXPR 1 
   23 #include <unsupported/Eigen/CXX11/Tensor> 
   26 using Eigen::SyclDevice;
 
   30 template <
typename DataType, 
int Layout, 
typename DenseIndex>
 
   36   in *= in.constant(100.0);
 
   37   in(0, 0, 0) = -1000.0;
 
   40   std::size_t in_bytes = in.size() * 
sizeof(DataType);
 
   43   DataType* d_in = 
static_cast<DataType*
>(sycl_device.allocate(in_bytes));
 
   51   sycl_device.memcpyHostToDevice(d_in, in.data(), in_bytes);
 
   53   gpu_out_max.device(sycl_device) = gpu_in.argmax();
 
   54   gpu_out_min.device(sycl_device) = gpu_in.argmin();
 
   56   sycl_device.memcpyDeviceToHost(out_max.
data(), d_out_max, out_bytes);
 
   57   sycl_device.memcpyDeviceToHost(out_min.
data(), d_out_min, out_bytes);
 
   62   sycl_device.deallocate(d_in);
 
   63   sycl_device.deallocate(d_out_max);
 
   64   sycl_device.deallocate(d_out_min);
 
   67 template <
typename DataType, 
int DataLayout, 
typename DenseIndex>
 
   75   std::vector<DenseIndex> dims;
 
   76   dims.push_back(sizeDim0);
 
   77   dims.push_back(sizeDim1);
 
   78   dims.push_back(sizeDim2);
 
   79   dims.push_back(sizeDim3);
 
   82     for (
DenseIndex d = 0; 
d < 3; ++
d) out_shape[
d] = (
d < dim) ? dims[
d] : dims[
d + 1];
 
   97             tensor(ix) = (ix[dim] != 0) ? -1.0 : 10.0;
 
  106     DataType* d_in = 
static_cast<DataType*
>(sycl_device.allocate(in_bytes));
 
  113     sycl_device.memcpyHostToDevice(d_in, tensor.
data(), in_bytes);
 
  114     gpu_out.device(sycl_device) = gpu_in.argmax(dim);
 
  115     sycl_device.memcpyDeviceToHost(tensor_arg.
data(), d_out, out_bytes);
 
  118                     size_t(sizeDim0 * sizeDim1 * sizeDim2 * sizeDim3 / tensor.
dimension(dim)));
 
  125     sycl_device.synchronize();
 
  136             tensor(ix) = (ix[dim] != tensor.
dimension(dim) - 1) ? -1.0 : 20.0;
 
  142     sycl_device.memcpyHostToDevice(d_in, tensor.
data(), in_bytes);
 
  143     gpu_out.device(sycl_device) = gpu_in.argmax(dim);
 
  144     sycl_device.memcpyDeviceToHost(tensor_arg.
data(), d_out, out_bytes);
 
  150     sycl_device.deallocate(d_in);
 
  151     sycl_device.deallocate(d_out);
 
  155 template <
typename DataType, 
int DataLayout, 
typename DenseIndex>
 
  163   std::vector<DenseIndex> dims;
 
  164   dims.push_back(sizeDim0);
 
  165   dims.push_back(sizeDim1);
 
  166   dims.push_back(sizeDim2);
 
  167   dims.push_back(sizeDim3);
 
  170     for (
DenseIndex d = 0; 
d < 3; ++
d) out_shape[
d] = (
d < dim) ? dims[
d] : dims[
d + 1];
 
  184             tensor(ix) = (ix[dim] != 0) ? 1.0 : -10.0;
 
  193     DataType* d_in = 
static_cast<DataType*
>(sycl_device.allocate(in_bytes));
 
  200     sycl_device.memcpyHostToDevice(d_in, tensor.
data(), in_bytes);
 
  201     gpu_out.device(sycl_device) = gpu_in.argmin(dim);
 
  202     sycl_device.memcpyDeviceToHost(tensor_arg.
data(), d_out, out_bytes);
 
  205                     size_t(sizeDim0 * sizeDim1 * sizeDim2 * sizeDim3 / tensor.
dimension(dim)));
 
  212     sycl_device.synchronize();
 
  223             tensor(ix) = (ix[dim] != tensor.
dimension(dim) - 1) ? 1.0 : -20.0;
 
  229     sycl_device.memcpyHostToDevice(d_in, tensor.
data(), in_bytes);
 
  230     gpu_out.device(sycl_device) = gpu_in.argmin(dim);
 
  231     sycl_device.memcpyDeviceToHost(tensor_arg.
data(), d_out, out_bytes);
 
  237     sycl_device.deallocate(d_in);
 
  238     sycl_device.deallocate(d_out);
 
  242 template <
typename DataType, 
typename Device_Selector>
 
  244   QueueInterface queueInterface(
d);
 
  245   auto sycl_device = Eigen::SyclDevice(&queueInterface);
 
  246   test_sycl_simple_argmax<DataType, RowMajor, int64_t>(sycl_device);
 
  247   test_sycl_simple_argmax<DataType, ColMajor, int64_t>(sycl_device);
 
  248   test_sycl_argmax_dim<DataType, ColMajor, int64_t>(sycl_device);
 
  249   test_sycl_argmax_dim<DataType, RowMajor, int64_t>(sycl_device);
 
  250   test_sycl_argmin_dim<DataType, ColMajor, int64_t>(sycl_device);
 
  251   test_sycl_argmin_dim<DataType, RowMajor, int64_t>(sycl_device);
 
  255   for (
const auto& device : Eigen::get_sycl_supported_devices()) {
 
  256     CALL_SUBTEST(sycl_argmax_test_per_device<float>(device));