cxx11_tensor_custom_op_sycl.cpp
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2016
5 // Mehdi Goli Codeplay Software Ltd.
6 // Ralph Potter Codeplay Software Ltd.
7 // Luke Iwanski Codeplay Software Ltd.
8 // Contact: <eigen@codeplay.com>
9 //
10 // This Source Code Form is subject to the terms of the Mozilla
11 // Public License v. 2.0. If a copy of the MPL was not distributed
12 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
13 
14 #define EIGEN_TEST_NO_LONGDOUBLE
15 #define EIGEN_TEST_NO_COMPLEX
16 
17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
18 #define EIGEN_USE_SYCL
19 
20 #include "main.h"
21 #include <unsupported/Eigen/CXX11/Tensor>
22 
23 using Eigen::Tensor;
24 template<typename TensorType>
25 struct InsertZeros {
26  DSizes<DenseIndex, 2> dimensions(const TensorType& input) const {
28  result[0] = input.dimension(0) * 2;
29  result[1] = input.dimension(1) * 2;
30  return result;
31  }
32 
33  template <typename Output, typename Device>
34  void eval(const TensorType& input, Output& output, const Device& device) const
35  {
37  strides[0] = 2;
38  strides[1] = 2;
39  output.stride(strides).device(device) = input;
40 
42  Eigen::DSizes<DenseIndex, 2> extents(output.dimension(0)-1, output.dimension(1)-1);
43  output.slice(offsets, extents).stride(strides).device(device) = input.constant(0.0f);
44  }
45 };
46 
47 template<typename DataType, int DataLayout, typename IndexType>
48 static void test_custom_unary_op_sycl(const Eigen::SyclDevice &sycl_device)
49 {
50  IndexType sizeDim1 = 3;
51  IndexType sizeDim2 = 5;
52  Eigen::array<IndexType, 2> tensorRange = {{sizeDim1, sizeDim2}};
53  Eigen::array<IndexType, 2> tensorResultRange = {{6, 10}};
54 
57 
58  DataType * gpu_in1_data = static_cast<DataType*>(sycl_device.allocate(in1.dimensions().TotalSize()*sizeof(DataType)));
59  DataType * gpu_out_data = static_cast<DataType*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(DataType)));
60 
62  TensorType gpu_in1(gpu_in1_data, tensorRange);
63  TensorType gpu_out(gpu_out_data, tensorResultRange);
64 
65  in1.setRandom();
66  sycl_device.memcpyHostToDevice(gpu_in1_data, in1.data(),(in1.dimensions().TotalSize())*sizeof(DataType));
67  gpu_out.device(sycl_device) = gpu_in1.customOp(InsertZeros<TensorType>());
68  sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(DataType));
69 
70  VERIFY_IS_EQUAL(out.dimension(0), 6);
71  VERIFY_IS_EQUAL(out.dimension(1), 10);
72 
73  for (int i = 0; i < 6; i+=2) {
74  for (int j = 0; j < 10; j+=2) {
75  VERIFY_IS_EQUAL(out(i, j), in1(i/2, j/2));
76  }
77  }
78  for (int i = 1; i < 6; i+=2) {
79  for (int j = 1; j < 10; j+=2) {
80  VERIFY_IS_EQUAL(out(i, j), 0);
81  }
82  }
83  sycl_device.deallocate(gpu_in1_data);
84 sycl_device.deallocate(gpu_out_data);
85 }
86 
87 template<typename TensorType>
88 struct BatchMatMul {
89  DSizes<DenseIndex, 3> dimensions(const TensorType& input1, const TensorType& input2) const {
91  result[0] = input1.dimension(0);
92  result[1] = input2.dimension(1);
93  result[2] = input2.dimension(2);
94  return result;
95  }
96 
97  template <typename Output, typename Device>
98  void eval(const TensorType& input1, const TensorType& input2,
99  Output& output, const Device& device) const
100  {
101  typedef typename TensorType::DimensionPair DimPair;
102  array<DimPair, 1> dims;
103  dims[0] = DimPair(1, 0);
104  for (int64_t i = 0; i < output.dimension(2); ++i) {
105  output.template chip<2>(i).device(device) = input1.template chip<2>(i).contract(input2.template chip<2>(i), dims);
106  }
107  }
108 };
109 
110 template<typename DataType, int DataLayout, typename IndexType>
111 static void test_custom_binary_op_sycl(const Eigen::SyclDevice &sycl_device)
112 {
113 
114  Eigen::array<IndexType, 3> tensorRange1 = {{2, 3, 5}};
115  Eigen::array<IndexType, 3> tensorRange2 = {{3,7,5}};
116  Eigen::array<IndexType, 3> tensorResultRange = {{2, 7, 5}};
117 
121 
122  DataType * gpu_in1_data = static_cast<DataType*>(sycl_device.allocate(in1.dimensions().TotalSize()*sizeof(DataType)));
123  DataType * gpu_in2_data = static_cast<DataType*>(sycl_device.allocate(in2.dimensions().TotalSize()*sizeof(DataType)));
124  DataType * gpu_out_data = static_cast<DataType*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(DataType)));
125 
127  TensorType gpu_in1(gpu_in1_data, tensorRange1);
128  TensorType gpu_in2(gpu_in2_data, tensorRange2);
129  TensorType gpu_out(gpu_out_data, tensorResultRange);
130 
131  in1.setRandom();
132  in2.setRandom();
133 
134  sycl_device.memcpyHostToDevice(gpu_in1_data, in1.data(),(in1.dimensions().TotalSize())*sizeof(DataType));
135  sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in2.dimensions().TotalSize())*sizeof(DataType));
136 
137  gpu_out.device(sycl_device) = gpu_in1.customOp(gpu_in2, BatchMatMul<TensorType>());
138  sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(DataType));
139 
140  for (IndexType i = 0; i < 5; ++i) {
142  array<DimPair, 1> dims;
143  dims[0] = DimPair(1, 0);
144  Eigen::Tensor<DataType, 2, DataLayout, IndexType> reference = in1.template chip<2>(i).contract(in2.template chip<2>(i), dims);
146  for (IndexType j = 0; j < 2; ++j) {
147  for (IndexType k = 0; k < 7; ++k) {
148  VERIFY_IS_APPROX(val(j, k), reference(j, k));
149  }
150  }
151  }
152  sycl_device.deallocate(gpu_in1_data);
153  sycl_device.deallocate(gpu_in2_data);
154  sycl_device.deallocate(gpu_out_data);
155 }
156 
157 template <typename DataType, typename Dev_selector> void custom_op_perDevice(Dev_selector s){
158  QueueInterface queueInterface(s);
159  auto sycl_device = Eigen::SyclDevice(&queueInterface);
160  test_custom_unary_op_sycl<DataType, RowMajor, int64_t>(sycl_device);
161  test_custom_unary_op_sycl<DataType, ColMajor, int64_t>(sycl_device);
162  test_custom_binary_op_sycl<DataType, ColMajor, int64_t>(sycl_device);
163  test_custom_binary_op_sycl<DataType, RowMajor, int64_t>(sycl_device);
164 
165 }
166 EIGEN_DECLARE_TEST(cxx11_tensor_custom_op_sycl) {
167  for (const auto& device :Eigen::get_sycl_supported_devices()) {
168  CALL_SUBTEST(custom_op_perDevice<float>(device));
169  }
170 }
Eigen::Tensor
The tensor class.
Definition: Tensor.h:63
Eigen::TensorRef
A reference to a tensor expression The expression will be evaluated lazily (as much as possible).
Definition: TensorForwardDeclarations.h:55
Eigen::internal::strides
EIGEN_ALWAYS_INLINE DSizes< IndexType, NumDims > strides(const DSizes< IndexType, NumDims > &dimensions)
Definition: TensorBlock.h:26
s
RealScalar s
Definition: level1_cplx_impl.h:126
Eigen::array
Definition: EmulateArray.h:21
VERIFY_IS_EQUAL
#define VERIFY_IS_EQUAL(a, b)
Definition: main.h:386
InsertZeros::dimensions
DSizes< DenseIndex, 2 > dimensions(const TensorType &input) const
Definition: cxx11_tensor_custom_op_sycl.cpp:26
BatchMatMul::dimensions
DSizes< DenseIndex, 3 > dimensions(const TensorType &input1, const TensorType &input2) const
Definition: cxx11_tensor_custom_op_sycl.cpp:89
result
Values result
Definition: OdometryOptimize.cpp:8
Eigen::DSizes
Definition: TensorDimensions.h:263
EIGEN_DECLARE_TEST
EIGEN_DECLARE_TEST(cxx11_tensor_custom_op_sycl)
Definition: cxx11_tensor_custom_op_sycl.cpp:166
test_custom_binary_op_sycl
static void test_custom_binary_op_sycl(const Eigen::SyclDevice &sycl_device)
Definition: cxx11_tensor_custom_op_sycl.cpp:111
custom_op_perDevice
void custom_op_perDevice(Dev_selector s)
Definition: cxx11_tensor_custom_op_sycl.cpp:157
j
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2
int64_t
signed __int64 int64_t
Definition: ms_stdint.h:94
Eigen::Tensor::dimensions
EIGEN_DEVICE_FUNC const EIGEN_STRONG_INLINE Dimensions & dimensions() const
Definition: Tensor.h:102
BatchMatMul::eval
void eval(const TensorType &input1, const TensorType &input2, Output &output, const Device &device) const
Definition: cxx11_tensor_custom_op_sycl.cpp:98
test_custom_unary_op_sycl
static void test_custom_unary_op_sycl(const Eigen::SyclDevice &sycl_device)
Definition: cxx11_tensor_custom_op_sycl.cpp:48
return_value_policy::reference
@ reference
Eigen::DSizes::TotalSize
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex TotalSize() const
Definition: TensorDimensions.h:271
Eigen::TensorMap
A tensor expression mapping an existing array of data.
Definition: TensorForwardDeclarations.h:52
out
std::ofstream out("Result.txt")
tree::f
Point2(* f)(const Point3 &, OptionalJacobian< 2, 3 >)
Definition: testExpression.cpp:218
VERIFY_IS_APPROX
#define VERIFY_IS_APPROX(a, b)
Definition: integer_types.cpp:15
Eigen::TensorBase< Tensor< Scalar_, NumIndices_, Options_, IndexType_ > >::setRandom
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor< Scalar_, NumIndices_, Options_, IndexType_ > & setRandom()
Definition: TensorBase.h:996
InsertZeros
Definition: cxx11_tensor_custom_op.cpp:17
main.h
offsets
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy y set format x g set format y g set format x2 g set format y2 g set format z g set angles radians set nogrid set key title set key left top Right noreverse box linetype linewidth samplen spacing width set nolabel set noarrow set nologscale set logscale x set offsets
Definition: gnuplot_common_settings.hh:27
BatchMatMul
Definition: cxx11_tensor_custom_op.cpp:61
Eigen::Tensor::data
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar * data()
Definition: Tensor.h:104
DimPair
Tensor< float, 1 >::DimensionPair DimPair
Definition: cxx11_tensor_contraction.cpp:17
i
int i
Definition: BiCGSTAB_step_by_step.cpp:9
InsertZeros::eval
void eval(const TensorType &input, Output &output, const Device &device) const
Definition: cxx11_tensor_custom_op_sycl.cpp:34
CALL_SUBTEST
#define CALL_SUBTEST(FUNC)
Definition: main.h:399


gtsam
Author(s):
autogenerated on Wed Jan 1 2025 04:01:23