cxx11_tensor_scan_sycl.cpp
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2016
5 // Mehdi Goli Codeplay Software Ltd.
6 // Ralph Potter Codeplay Software Ltd.
7 // Luke Iwanski Codeplay Software Ltd.
8 // Contact: <eigen@codeplay.com>
9 //
10 // This Source Code Form is subject to the terms of the Mozilla
11 // Public License v. 2.0. If a copy of the MPL was not distributed
12 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
13 
14 #define EIGEN_TEST_NO_LONGDOUBLE
15 #define EIGEN_TEST_NO_COMPLEX
16 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
17 #define EIGEN_USE_SYCL
18 
19 #include "main.h"
20 #include <unsupported/Eigen/CXX11/Tensor>
21 
22 using Eigen::Tensor;
24 
25 template <typename DataType, int DataLayout, typename IndexType>
26 void test_sycl_cumsum(const Eigen::SyclDevice& sycl_device, IndexType m_size,
27  IndexType k_size, IndexType n_size, int consume_dim,
28  bool exclusive) {
29  static const DataType error_threshold = 1e-4f;
30  std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size
31  << " consume_dim : " << consume_dim << ")" << std::endl;
32  Tensor<DataType, 3, DataLayout, IndexType> t_input(m_size, k_size, n_size);
33  Tensor<DataType, 3, DataLayout, IndexType> t_result(m_size, k_size, n_size);
34  Tensor<DataType, 3, DataLayout, IndexType> t_result_gpu(m_size, k_size,
35  n_size);
36 
37  t_input.setRandom();
38  std::size_t t_input_bytes = t_input.size() * sizeof(DataType);
39  std::size_t t_result_bytes = t_result.size() * sizeof(DataType);
40 
41  DataType* gpu_data_in =
42  static_cast<DataType*>(sycl_device.allocate(t_input_bytes));
43  DataType* gpu_data_out =
44  static_cast<DataType*>(sycl_device.allocate(t_result_bytes));
45 
46  array<IndexType, 3> tensorRange = {{m_size, k_size, n_size}};
48  gpu_data_in, tensorRange);
50  gpu_data_out, tensorRange);
51  sycl_device.memcpyHostToDevice(gpu_data_in, t_input.data(), t_input_bytes);
52  sycl_device.memcpyHostToDevice(gpu_data_out, t_input.data(), t_input_bytes);
53 
54  gpu_t_result.device(sycl_device) = gpu_t_input.cumsum(consume_dim, exclusive);
55 
56  t_result = t_input.cumsum(consume_dim, exclusive);
57 
58  sycl_device.memcpyDeviceToHost(t_result_gpu.data(), gpu_data_out,
59  t_result_bytes);
60  sycl_device.synchronize();
61 
62  for (IndexType i = 0; i < t_result.size(); i++) {
63  if (static_cast<DataType>(std::fabs(static_cast<DataType>(
64  t_result(i) - t_result_gpu(i)))) < error_threshold) {
65  continue;
66  }
67  if (Eigen::internal::isApprox(t_result(i), t_result_gpu(i),
68  error_threshold)) {
69  continue;
70  }
71  std::cout << "mismatch detected at index " << i << " CPU : " << t_result(i)
72  << " vs SYCL : " << t_result_gpu(i) << std::endl;
73  assert(false);
74  }
75  sycl_device.deallocate(gpu_data_in);
76  sycl_device.deallocate(gpu_data_out);
77 }
78 
79 template <typename DataType, typename Dev>
80 void sycl_scan_test_exclusive_dim0_per_device(const Dev& sycl_device) {
81  test_sycl_cumsum<DataType, ColMajor, int64_t>(sycl_device, 2049, 1023, 127, 0,
82  true);
83  test_sycl_cumsum<DataType, RowMajor, int64_t>(sycl_device, 2049, 1023, 127, 0,
84  true);
85 }
86 template <typename DataType, typename Dev>
87 void sycl_scan_test_exclusive_dim1_per_device(const Dev& sycl_device) {
88  test_sycl_cumsum<DataType, ColMajor, int64_t>(sycl_device, 1023, 2049, 127, 1,
89  true);
90  test_sycl_cumsum<DataType, RowMajor, int64_t>(sycl_device, 1023, 2049, 127, 1,
91  true);
92 }
93 template <typename DataType, typename Dev>
94 void sycl_scan_test_exclusive_dim2_per_device(const Dev& sycl_device) {
95  test_sycl_cumsum<DataType, ColMajor, int64_t>(sycl_device, 1023, 127, 2049, 2,
96  true);
97  test_sycl_cumsum<DataType, RowMajor, int64_t>(sycl_device, 1023, 127, 2049, 2,
98  true);
99 }
100 template <typename DataType, typename Dev>
101 void sycl_scan_test_inclusive_dim0_per_device(const Dev& sycl_device) {
102  test_sycl_cumsum<DataType, ColMajor, int64_t>(sycl_device, 2049, 1023, 127, 0,
103  false);
104  test_sycl_cumsum<DataType, RowMajor, int64_t>(sycl_device, 2049, 1023, 127, 0,
105  false);
106 }
107 template <typename DataType, typename Dev>
108 void sycl_scan_test_inclusive_dim1_per_device(const Dev& sycl_device) {
109  test_sycl_cumsum<DataType, ColMajor, int64_t>(sycl_device, 1023, 2049, 127, 1,
110  false);
111  test_sycl_cumsum<DataType, RowMajor, int64_t>(sycl_device, 1023, 2049, 127, 1,
112  false);
113 }
114 template <typename DataType, typename Dev>
115 void sycl_scan_test_inclusive_dim2_per_device(const Dev& sycl_device) {
116  test_sycl_cumsum<DataType, ColMajor, int64_t>(sycl_device, 1023, 127, 2049, 2,
117  false);
118  test_sycl_cumsum<DataType, RowMajor, int64_t>(sycl_device, 1023, 127, 2049, 2,
119  false);
120 }
121 EIGEN_DECLARE_TEST(cxx11_tensor_scan_sycl) {
122  for (const auto& device : Eigen::get_sycl_supported_devices()) {
123  std::cout << "Running on "
124  << device.template get_info<cl::sycl::info::device::name>()
125  << std::endl;
126  QueueInterface queueInterface(device);
127  auto sycl_device = Eigen::SyclDevice(&queueInterface);
129  sycl_scan_test_exclusive_dim0_per_device<float>(sycl_device));
131  sycl_scan_test_exclusive_dim1_per_device<float>(sycl_device));
133  sycl_scan_test_exclusive_dim2_per_device<float>(sycl_device));
135  sycl_scan_test_inclusive_dim0_per_device<float>(sycl_device));
137  sycl_scan_test_inclusive_dim1_per_device<float>(sycl_device));
139  sycl_scan_test_inclusive_dim2_per_device<float>(sycl_device));
140  }
141 }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const
Definition: Tensor.h:103
#define CALL_SUBTEST_6(FUNC)
#define CALL_SUBTEST_4(FUNC)
void sycl_scan_test_exclusive_dim1_per_device(const Dev &sycl_device)
#define CALL_SUBTEST_3(FUNC)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor< Scalar_, NumIndices_, Options_, IndexType_ > & setRandom()
Definition: TensorBase.h:996
EIGEN_DECLARE_TEST(cxx11_tensor_scan_sycl)
Tensor< float, 1 >::DimensionPair DimPair
Real fabs(const Real &a)
void sycl_scan_test_inclusive_dim0_per_device(const Dev &sycl_device)
void sycl_scan_test_inclusive_dim1_per_device(const Dev &sycl_device)
#define CALL_SUBTEST_1(FUNC)
A tensor expression mapping an existing array of data.
Point2(* f)(const Point3 &, OptionalJacobian< 2, 3 >)
Array< double, 1, 3 > e(1./3., 0.5, 2.)
TensorDevice< TensorMap< PlainObjectType, Options_, MakePointer_ >, DeviceType > device(const DeviceType &dev)
Definition: TensorBase.h:1145
void test_sycl_cumsum(const Eigen::SyclDevice &sycl_device, IndexType m_size, IndexType k_size, IndexType n_size, int consume_dim, bool exclusive)
static const float error_threshold
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar * data()
Definition: Tensor.h:104
#define CALL_SUBTEST_5(FUNC)
void sycl_scan_test_exclusive_dim2_per_device(const Dev &sycl_device)
void sycl_scan_test_exclusive_dim0_per_device(const Dev &sycl_device)
#define CALL_SUBTEST_2(FUNC)
EIGEN_DEVICE_FUNC bool isApprox(const Scalar &x, const Scalar &y, const typename NumTraits< Scalar >::Real &precision=NumTraits< Scalar >::dummy_precision())
void sycl_scan_test_inclusive_dim2_per_device(const Dev &sycl_device)
The tensor class.
Definition: Tensor.h:63


gtsam
Author(s):
autogenerated on Tue Jul 4 2023 02:34:08