14 #define EIGEN_TEST_NO_LONGDOUBLE 15 #define EIGEN_TEST_NO_COMPLEX 17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t 18 #define EIGEN_USE_SYCL 19 #define EIGEN_HAS_CONSTEXPR 1 23 #include <unsupported/Eigen/CXX11/Tensor> 26 using Eigen::SyclDevice;
30 template <
typename DataType,
int Layout,
typename DenseIndex>
36 in *= in.constant(100.0);
37 in(0, 0, 0) = -1000.0;
40 std::size_t in_bytes = in.size() *
sizeof(DataType);
43 DataType* d_in =
static_cast<DataType*
>(sycl_device.allocate(in_bytes));
51 sycl_device.memcpyHostToDevice(d_in, in.data(), in_bytes);
53 gpu_out_max.
device(sycl_device) = gpu_in.argmax();
54 gpu_out_min.
device(sycl_device) = gpu_in.argmin();
56 sycl_device.memcpyDeviceToHost(out_max.
data(), d_out_max, out_bytes);
57 sycl_device.memcpyDeviceToHost(out_min.
data(), d_out_min, out_bytes);
62 sycl_device.deallocate(d_in);
63 sycl_device.deallocate(d_out_max);
64 sycl_device.deallocate(d_out_min);
67 template <
typename DataType,
int DataLayout,
typename DenseIndex>
75 std::vector<DenseIndex> dims;
76 dims.push_back(sizeDim0);
77 dims.push_back(sizeDim1);
78 dims.push_back(sizeDim2);
79 dims.push_back(sizeDim3);
82 for (
DenseIndex d = 0;
d < 3; ++
d) out_shape[
d] = (
d < dim) ? dims[
d] : dims[
d + 1];
97 tensor(ix) = (ix[dim] != 0) ? -1.0 : 10.0;
106 DataType* d_in =
static_cast<DataType*
>(sycl_device.allocate(in_bytes));
113 sycl_device.memcpyHostToDevice(d_in, tensor.
data(), in_bytes);
114 gpu_out.
device(sycl_device) = gpu_in.argmax(dim);
115 sycl_device.memcpyDeviceToHost(tensor_arg.
data(), d_out, out_bytes);
118 size_t(sizeDim0 * sizeDim1 * sizeDim2 * sizeDim3 / tensor.
dimension(dim)));
125 sycl_device.synchronize();
136 tensor(ix) = (ix[dim] != tensor.
dimension(dim) - 1) ? -1.0 : 20.0;
142 sycl_device.memcpyHostToDevice(d_in, tensor.
data(), in_bytes);
143 gpu_out.
device(sycl_device) = gpu_in.argmax(dim);
144 sycl_device.memcpyDeviceToHost(tensor_arg.
data(), d_out, out_bytes);
150 sycl_device.deallocate(d_in);
151 sycl_device.deallocate(d_out);
155 template <
typename DataType,
int DataLayout,
typename DenseIndex>
163 std::vector<DenseIndex> dims;
164 dims.push_back(sizeDim0);
165 dims.push_back(sizeDim1);
166 dims.push_back(sizeDim2);
167 dims.push_back(sizeDim3);
170 for (
DenseIndex d = 0;
d < 3; ++
d) out_shape[
d] = (
d < dim) ? dims[
d] : dims[
d + 1];
184 tensor(ix) = (ix[dim] != 0) ? 1.0 : -10.0;
193 DataType* d_in =
static_cast<DataType*
>(sycl_device.allocate(in_bytes));
200 sycl_device.memcpyHostToDevice(d_in, tensor.
data(), in_bytes);
201 gpu_out.
device(sycl_device) = gpu_in.argmin(dim);
202 sycl_device.memcpyDeviceToHost(tensor_arg.
data(), d_out, out_bytes);
205 size_t(sizeDim0 * sizeDim1 * sizeDim2 * sizeDim3 / tensor.
dimension(dim)));
212 sycl_device.synchronize();
223 tensor(ix) = (ix[dim] != tensor.
dimension(dim) - 1) ? 1.0 : -20.0;
229 sycl_device.memcpyHostToDevice(d_in, tensor.
data(), in_bytes);
230 gpu_out.
device(sycl_device) = gpu_in.argmin(dim);
231 sycl_device.memcpyDeviceToHost(tensor_arg.
data(), d_out, out_bytes);
237 sycl_device.deallocate(d_in);
238 sycl_device.deallocate(d_out);
242 template <
typename DataType,
typename Device_Selector>
244 QueueInterface queueInterface(d);
245 auto sycl_device = Eigen::SyclDevice(&queueInterface);
246 test_sycl_simple_argmax<DataType, RowMajor, int64_t>(sycl_device);
247 test_sycl_simple_argmax<DataType, ColMajor, int64_t>(sycl_device);
248 test_sycl_argmax_dim<DataType, ColMajor, int64_t>(sycl_device);
249 test_sycl_argmax_dim<DataType, RowMajor, int64_t>(sycl_device);
250 test_sycl_argmin_dim<DataType, ColMajor, int64_t>(sycl_device);
251 test_sycl_argmin_dim<DataType, RowMajor, int64_t>(sycl_device);
255 for (
const auto& device : Eigen::get_sycl_supported_devices()) {
256 CALL_SUBTEST(sycl_argmax_test_per_device<float>(device));
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const
EIGEN_DECLARE_TEST(cxx11_tensor_argmax_sycl)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived & setRandom()
static const Line3 l(Rot3(), 1, 1)
#define VERIFY_IS_EQUAL(a, b)
A tensor expression mapping an existing array of data.
void sycl_argmax_test_per_device(const Device_Selector &d)
static void test_sycl_argmin_dim(const Eigen::SyclDevice &sycl_device)
static void test_sycl_simple_argmax(const Eigen::SyclDevice &sycl_device)
TensorDevice< TensorMap< PlainObjectType, Options_, MakePointer_ >, DeviceType > device(const DeviceType &dev)
EIGEN_DEFAULT_DENSE_INDEX_TYPE DenseIndex
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar * data()
static void test_sycl_argmax_dim(const Eigen::SyclDevice &sycl_device)
#define CALL_SUBTEST(FUNC)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index dimension(std::size_t n) const