14 #define EIGEN_TEST_NO_LONGDOUBLE
15 #define EIGEN_TEST_NO_COMPLEX
17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
18 #define EIGEN_USE_SYCL
19 #define EIGEN_HAS_CONSTEXPR 1
23 #include <unsupported/Eigen/CXX11/Tensor>
26 using Eigen::SyclDevice;
30 template <
typename DataType,
int Layout,
typename DenseIndex>
36 in *= in.constant(100.0);
37 in(0, 0, 0) = -1000.0;
40 std::size_t in_bytes = in.size() *
sizeof(DataType);
43 DataType* d_in =
static_cast<DataType*
>(sycl_device.allocate(in_bytes));
51 sycl_device.memcpyHostToDevice(d_in, in.data(), in_bytes);
53 gpu_out_max.device(sycl_device) = gpu_in.argmax();
54 gpu_out_min.device(sycl_device) = gpu_in.argmin();
56 sycl_device.memcpyDeviceToHost(out_max.
data(), d_out_max, out_bytes);
57 sycl_device.memcpyDeviceToHost(out_min.
data(), d_out_min, out_bytes);
62 sycl_device.deallocate(d_in);
63 sycl_device.deallocate(d_out_max);
64 sycl_device.deallocate(d_out_min);
67 template <
typename DataType,
int DataLayout,
typename DenseIndex>
75 std::vector<DenseIndex> dims;
76 dims.push_back(sizeDim0);
77 dims.push_back(sizeDim1);
78 dims.push_back(sizeDim2);
79 dims.push_back(sizeDim3);
82 for (
DenseIndex d = 0;
d < 3; ++
d) out_shape[
d] = (
d < dim) ? dims[
d] : dims[
d + 1];
97 tensor(ix) = (ix[dim] != 0) ? -1.0 : 10.0;
106 DataType* d_in =
static_cast<DataType*
>(sycl_device.allocate(in_bytes));
113 sycl_device.memcpyHostToDevice(d_in, tensor.
data(), in_bytes);
114 gpu_out.device(sycl_device) = gpu_in.argmax(dim);
115 sycl_device.memcpyDeviceToHost(tensor_arg.
data(), d_out, out_bytes);
118 size_t(sizeDim0 * sizeDim1 * sizeDim2 * sizeDim3 / tensor.
dimension(dim)));
125 sycl_device.synchronize();
136 tensor(ix) = (ix[dim] != tensor.
dimension(dim) - 1) ? -1.0 : 20.0;
142 sycl_device.memcpyHostToDevice(d_in, tensor.
data(), in_bytes);
143 gpu_out.device(sycl_device) = gpu_in.argmax(dim);
144 sycl_device.memcpyDeviceToHost(tensor_arg.
data(), d_out, out_bytes);
150 sycl_device.deallocate(d_in);
151 sycl_device.deallocate(d_out);
155 template <
typename DataType,
int DataLayout,
typename DenseIndex>
163 std::vector<DenseIndex> dims;
164 dims.push_back(sizeDim0);
165 dims.push_back(sizeDim1);
166 dims.push_back(sizeDim2);
167 dims.push_back(sizeDim3);
170 for (
DenseIndex d = 0;
d < 3; ++
d) out_shape[
d] = (
d < dim) ? dims[
d] : dims[
d + 1];
184 tensor(ix) = (ix[dim] != 0) ? 1.0 : -10.0;
193 DataType* d_in =
static_cast<DataType*
>(sycl_device.allocate(in_bytes));
200 sycl_device.memcpyHostToDevice(d_in, tensor.
data(), in_bytes);
201 gpu_out.device(sycl_device) = gpu_in.argmin(dim);
202 sycl_device.memcpyDeviceToHost(tensor_arg.
data(), d_out, out_bytes);
205 size_t(sizeDim0 * sizeDim1 * sizeDim2 * sizeDim3 / tensor.
dimension(dim)));
212 sycl_device.synchronize();
223 tensor(ix) = (ix[dim] != tensor.
dimension(dim) - 1) ? 1.0 : -20.0;
229 sycl_device.memcpyHostToDevice(d_in, tensor.
data(), in_bytes);
230 gpu_out.device(sycl_device) = gpu_in.argmin(dim);
231 sycl_device.memcpyDeviceToHost(tensor_arg.
data(), d_out, out_bytes);
237 sycl_device.deallocate(d_in);
238 sycl_device.deallocate(d_out);
242 template <
typename DataType,
typename Device_Selector>
244 QueueInterface queueInterface(
d);
245 auto sycl_device = Eigen::SyclDevice(&queueInterface);
246 test_sycl_simple_argmax<DataType, RowMajor, int64_t>(sycl_device);
247 test_sycl_simple_argmax<DataType, ColMajor, int64_t>(sycl_device);
248 test_sycl_argmax_dim<DataType, ColMajor, int64_t>(sycl_device);
249 test_sycl_argmax_dim<DataType, RowMajor, int64_t>(sycl_device);
250 test_sycl_argmin_dim<DataType, ColMajor, int64_t>(sycl_device);
251 test_sycl_argmin_dim<DataType, RowMajor, int64_t>(sycl_device);
255 for (
const auto& device : Eigen::get_sycl_supported_devices()) {
256 CALL_SUBTEST(sycl_argmax_test_per_device<float>(device));