23 #ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP 24 #define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP 27 namespace TensorSycl {
32 template <
typename Expr,
typename Dev>
33 void run(Expr &expr, Dev &dev) {
40 size_t tileSize =dev.m_queue.get_device().
template get_info<cl::sycl::info::device::max_work_group_size>()/2;
41 dev.m_queue.submit([&](cl::sycl::handler &cgh) {
44 auto tuple_of_accessors = internal::createTupleOfAccessors<decltype(evaluator)>(cgh, evaluator);
45 const auto range = utility::tuple::get<0>(tuple_of_accessors).get_range()[0];
47 if (tileSize>GRange) tileSize=GRange;
48 else if(GRange>tileSize){
49 size_t xMode = GRange % tileSize;
50 if (xMode != 0) GRange += (tileSize - xMode);
53 cgh.parallel_for<PlaceHolderExpr>( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), [=](cl::sycl::nd_item<1> itemID) {
55 auto device_expr =internal::createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors);
57 if (itemID.get_global_linear_id() < range) {
58 device_evaluator.evalScalar(static_cast<int>(itemID.get_global_linear_id()));
62 dev.m_queue.throw_asynchronous();
70 #endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup()
A cost model used to limit the number of threads used for evaluating tensor expression.
PlaceHolderExpression< Expr, TotalLeaves-1 >::Type Type
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType *dest)
auto extractFunctors(const Evaluator &evaluator) -> FunctorExtractor< Evaluator >
template deduction function for FunctorExtractor
void run(Expr &expr, Dev &dev)
This struct is used to convert the MakePointer in the host expression to the MakeGlobalPointer for th...