14 #define EIGEN_TEST_NO_LONGDOUBLE 15 #define EIGEN_TEST_NO_COMPLEX 16 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t 17 #define EIGEN_USE_SYCL 20 #include <unsupported/Eigen/CXX11/Tensor> 25 template <
typename DataType,
int DataLayout,
typename IndexType>
27 IndexType k_size, IndexType n_size,
int consume_dim,
30 std::cout <<
"Testing for (" << m_size <<
"," << k_size <<
"," << n_size
31 <<
" consume_dim : " << consume_dim <<
")" << std::endl;
41 DataType* gpu_data_in =
42 static_cast<DataType*
>(sycl_device.allocate(t_input_bytes));
43 DataType* gpu_data_out =
44 static_cast<DataType*
>(sycl_device.allocate(t_result_bytes));
48 gpu_data_in, tensorRange);
50 gpu_data_out, tensorRange);
51 sycl_device.memcpyHostToDevice(gpu_data_in, t_input.
data(), t_input_bytes);
52 sycl_device.memcpyHostToDevice(gpu_data_out, t_input.
data(), t_input_bytes);
54 gpu_t_result.
device(sycl_device) = gpu_t_input.cumsum(consume_dim, exclusive);
56 t_result = t_input.cumsum(consume_dim, exclusive);
58 sycl_device.memcpyDeviceToHost(t_result_gpu.
data(), gpu_data_out,
60 sycl_device.synchronize();
62 for (IndexType
i = 0;
i < t_result.
size();
i++) {
63 if (static_cast<DataType>(
std::fabs(static_cast<DataType>(
71 std::cout <<
"mismatch detected at index " <<
i <<
" CPU : " << t_result(
i)
72 <<
" vs SYCL : " << t_result_gpu(
i) << std::endl;
75 sycl_device.deallocate(gpu_data_in);
76 sycl_device.deallocate(gpu_data_out);
79 template <
typename DataType,
typename Dev>
81 test_sycl_cumsum<DataType, ColMajor, int64_t>(sycl_device, 2049, 1023, 127, 0,
83 test_sycl_cumsum<DataType, RowMajor, int64_t>(sycl_device, 2049, 1023, 127, 0,
86 template <
typename DataType,
typename Dev>
88 test_sycl_cumsum<DataType, ColMajor, int64_t>(sycl_device, 1023, 2049, 127, 1,
90 test_sycl_cumsum<DataType, RowMajor, int64_t>(sycl_device, 1023, 2049, 127, 1,
93 template <
typename DataType,
typename Dev>
95 test_sycl_cumsum<DataType, ColMajor, int64_t>(sycl_device, 1023, 127, 2049, 2,
97 test_sycl_cumsum<DataType, RowMajor, int64_t>(sycl_device, 1023, 127, 2049, 2,
100 template <
typename DataType,
typename Dev>
102 test_sycl_cumsum<DataType, ColMajor, int64_t>(sycl_device, 2049, 1023, 127, 0,
104 test_sycl_cumsum<DataType, RowMajor, int64_t>(sycl_device, 2049, 1023, 127, 0,
107 template <
typename DataType,
typename Dev>
109 test_sycl_cumsum<DataType, ColMajor, int64_t>(sycl_device, 1023, 2049, 127, 1,
111 test_sycl_cumsum<DataType, RowMajor, int64_t>(sycl_device, 1023, 2049, 127, 1,
114 template <
typename DataType,
typename Dev>
116 test_sycl_cumsum<DataType, ColMajor, int64_t>(sycl_device, 1023, 127, 2049, 2,
118 test_sycl_cumsum<DataType, RowMajor, int64_t>(sycl_device, 1023, 127, 2049, 2,
122 for (
const auto& device : Eigen::get_sycl_supported_devices()) {
123 std::cout <<
"Running on " 124 << device.template get_info<cl::sycl::info::device::name>()
126 QueueInterface queueInterface(device);
127 auto sycl_device = Eigen::SyclDevice(&queueInterface);
129 sycl_scan_test_exclusive_dim0_per_device<float>(sycl_device));
131 sycl_scan_test_exclusive_dim1_per_device<float>(sycl_device));
133 sycl_scan_test_exclusive_dim2_per_device<float>(sycl_device));
135 sycl_scan_test_inclusive_dim0_per_device<float>(sycl_device));
137 sycl_scan_test_inclusive_dim1_per_device<float>(sycl_device));
139 sycl_scan_test_inclusive_dim2_per_device<float>(sycl_device));
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const
#define CALL_SUBTEST_6(FUNC)
#define CALL_SUBTEST_4(FUNC)
void sycl_scan_test_exclusive_dim1_per_device(const Dev &sycl_device)
#define CALL_SUBTEST_3(FUNC)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor< Scalar_, NumIndices_, Options_, IndexType_ > & setRandom()
EIGEN_DECLARE_TEST(cxx11_tensor_scan_sycl)
Tensor< float, 1 >::DimensionPair DimPair
void sycl_scan_test_inclusive_dim0_per_device(const Dev &sycl_device)
void sycl_scan_test_inclusive_dim1_per_device(const Dev &sycl_device)
#define CALL_SUBTEST_1(FUNC)
A tensor expression mapping an existing array of data.
Point2(* f)(const Point3 &, OptionalJacobian< 2, 3 >)
Array< double, 1, 3 > e(1./3., 0.5, 2.)
TensorDevice< TensorMap< PlainObjectType, Options_, MakePointer_ >, DeviceType > device(const DeviceType &dev)
void test_sycl_cumsum(const Eigen::SyclDevice &sycl_device, IndexType m_size, IndexType k_size, IndexType n_size, int consume_dim, bool exclusive)
static const float error_threshold
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar * data()
#define CALL_SUBTEST_5(FUNC)
void sycl_scan_test_exclusive_dim2_per_device(const Dev &sycl_device)
void sycl_scan_test_exclusive_dim0_per_device(const Dev &sycl_device)
#define CALL_SUBTEST_2(FUNC)
EIGEN_DEVICE_FUNC bool isApprox(const Scalar &x, const Scalar &y, const typename NumTraits< Scalar >::Real &precision=NumTraits< Scalar >::dummy_precision())
void sycl_scan_test_inclusive_dim2_per_device(const Dev &sycl_device)