TensorSyclRun.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Mehdi Goli Codeplay Software Ltd.
5 // Ralph Potter Codeplay Software Ltd.
6 // Luke Iwanski Codeplay Software Ltd.
7 // Cummins Chris PhD student at The University of Edinburgh.
8 // Contact: <eigen@codeplay.com>
9 //
10 // This Source Code Form is subject to the terms of the Mozilla
11 // Public License v. 2.0. If a copy of the MPL was not distributed
12 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
13 
14 /*****************************************************************
15  * TensorSyclRun.h
16  *
17  * \brief:
18  * Schedule_kernel invoke an specialised version of kernel struct. The
19  * specialisation is based on the data dimension in sycl buffer
20  *
21 *****************************************************************/
22 
23 #ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP
24 #define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP
25 
26 namespace Eigen {
27 namespace TensorSycl {
32 template <typename Expr, typename Dev>
33 void run(Expr &expr, Dev &dev) {
34  Eigen::TensorEvaluator<Expr, Dev> evaluator(expr, dev);
35  const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
36  if (needs_assign) {
37  typedef typename internal::createPlaceHolderExpression<Expr>::Type PlaceHolderExpr;
38  auto functors = internal::extractFunctors(evaluator);
39 
40  size_t tileSize =dev.m_queue.get_device(). template get_info<cl::sycl::info::device::max_work_group_size>()/2;
41  dev.m_queue.submit([&](cl::sycl::handler &cgh) {
42 
43  // create a tuple of accessors from Evaluator
44  auto tuple_of_accessors = internal::createTupleOfAccessors<decltype(evaluator)>(cgh, evaluator);
45  const auto range = utility::tuple::get<0>(tuple_of_accessors).get_range()[0];
46  size_t GRange=range;
47  if (tileSize>GRange) tileSize=GRange;
48  else if(GRange>tileSize){
49  size_t xMode = GRange % tileSize;
50  if (xMode != 0) GRange += (tileSize - xMode);
51  }
52  // run the kernel
53  cgh.parallel_for<PlaceHolderExpr>( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), [=](cl::sycl::nd_item<1> itemID) {
54  typedef typename internal::ConvertToDeviceExpression<Expr>::Type DevExpr;
55  auto device_expr =internal::createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors);
56  auto device_evaluator = Eigen::TensorEvaluator<decltype(device_expr.expr), Eigen::DefaultDevice>(device_expr.expr, Eigen::DefaultDevice());
57  if (itemID.get_global_linear_id() < range) {
58  device_evaluator.evalScalar(static_cast<int>(itemID.get_global_linear_id()));
59  }
60  });
61  });
62  dev.m_queue.throw_asynchronous();
63  }
64 
65  evaluator.cleanup();
66 }
67 } // namespace TensorSycl
68 } // namespace Eigen
69 
70 #endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup()
Definition: LDLT.h:16
A cost model used to limit the number of threads used for evaluating tensor expression.
PlaceHolderExpression< Expr, TotalLeaves-1 >::Type Type
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType *dest)
auto extractFunctors(const Evaluator &evaluator) -> FunctorExtractor< Evaluator >
template deduction function for FunctorExtractor
void run(Expr &expr, Dev &dev)
Definition: TensorSyclRun.h:33
This struct is used to convert the MakePointer in the host expression to the MakeGlobalPointer for th...


hebiros
Author(s): Xavier Artache , Matthew Tesch
autogenerated on Thu Sep 3 2020 04:09:38