21 #ifndef EIGEN_PACKET_MATH_SYCL_H 22 #define EIGEN_PACKET_MATH_SYCL_H 23 #include <type_traits> 27 #ifdef SYCL_DEVICE_ONLY 29 #define SYCL_PLOADT_RO(address_space_target) \ 30 template <typename packet_type, int Alignment> \ 31 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt_ro( \ 32 typename cl::sycl::multi_ptr< \ 33 const typename unpacket_traits<packet_type>::type, \ 34 cl::sycl::access::address_space::address_space_target>::pointer_t \ 36 typedef typename unpacket_traits<packet_type>::type scalar; \ 37 typedef cl::sycl::multi_ptr< \ 38 scalar, cl::sycl::access::address_space::address_space_target> \ 40 auto res = packet_type( \ 41 static_cast<typename unpacket_traits<packet_type>::type>(0)); \ 42 res.load(0, multi_ptr(const_cast<typename multi_ptr::pointer_t>(from))); \ 46 SYCL_PLOADT_RO(global_space)
47 SYCL_PLOADT_RO(local_space)
51 template <
typename packet_type,
int Alignment,
typename T>
53 ploadt_ro(
const Eigen::TensorSycl::internal::RangeAccess<
54 cl::sycl::access::mode::read_write,
T>& from) {
55 return ploadt_ro<packet_type, Alignment>(from.get_pointer());
58 #ifdef SYCL_DEVICE_ONLY 59 #define SYCL_PLOAD(address_space_target, Alignment, AlignedType) \ 60 template <typename packet_type> \ 61 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType( \ 62 typename cl::sycl::multi_ptr< \ 63 const typename unpacket_traits<packet_type>::type, \ 64 cl::sycl::access::address_space::address_space_target>::pointer_t \ 66 return ploadt_ro<packet_type, Alignment>(from); \ 79 #define SYCL_PLOAD(Alignment, AlignedType) \ 80 template <typename packet_type> \ 81 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType( \ 82 const Eigen::TensorSycl::internal::RangeAccess< \ 83 cl::sycl::access::mode::read_write, \ 84 typename unpacket_traits<packet_type>::type> \ 86 return ploadt_ro<packet_type, Alignment>(from); \ 92 #ifdef SYCL_DEVICE_ONLY 95 #define SYCL_PLOADT(address_space_target) \ 96 template <typename packet_type, int Alignment> \ 97 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt( \ 98 typename cl::sycl::multi_ptr< \ 99 const typename unpacket_traits<packet_type>::type, \ 100 cl::sycl::access::address_space::address_space_target>::pointer_t \ 102 if (Alignment >= unpacket_traits<packet_type>::alignment) \ 103 return pload<packet_type>(from); \ 105 return ploadu<packet_type>(from); \ 109 SYCL_PLOADT(global_space)
111 SYCL_PLOADT(local_space)
115 template <
typename packet_type,
int Alignment>
117 ploadt(
const Eigen::TensorSycl::internal::RangeAccess<
118 cl::sycl::access::mode::read_write,
120 return ploadt<packet_type, Alignment>(from.get_pointer());
122 #ifdef SYCL_DEVICE_ONLY 125 #define SYCL_PLOADT_RO_SPECIAL(packet_type, Alignment) \ 127 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type \ 128 ploadt_ro<packet_type, Alignment>( \ 129 const typename unpacket_traits<packet_type>::type* from) { \ 130 typedef typename unpacket_traits<packet_type>::type scalar; \ 131 auto res = packet_type(static_cast<scalar>(0)); \ 132 res.template load<cl::sycl::access::address_space::private_space>( \ 133 0, const_cast<scalar*>(from)); \ 137 SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4,
Aligned)
138 SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2,
Aligned)
139 SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4,
Unaligned)
140 SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2,
Unaligned)
142 #define SYCL_PLOAD_SPECIAL(packet_type, alignment_type) \ 144 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##alignment_type( \ 145 const typename unpacket_traits<packet_type>::type* from) { \ 146 typedef typename unpacket_traits<packet_type>::type scalar; \ 147 auto res = packet_type(static_cast<scalar>(0)); \ 148 res.template load<cl::sycl::access::address_space::private_space>( \ 149 0, const_cast<scalar*>(from)); \ 152 SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4, )
153 SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2, )
154 SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4, u)
155 SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2, u)
157 #undef SYCL_PLOAD_SPECIAL 159 #define SYCL_PSTORE(scalar, packet_type, address_space_target, alignment) \ 161 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment( \ 162 typename cl::sycl::multi_ptr< \ 164 cl::sycl::access::address_space::address_space_target>::pointer_t \ 166 const packet_type& from) { \ 167 typedef cl::sycl::multi_ptr< \ 168 scalar, cl::sycl::access::address_space::address_space_target> \ 170 from.store(0, multi_ptr(to)); \ 174 SYCL_PSTORE(
float, cl::sycl::cl_float4, global_space, )
175 SYCL_PSTORE(
float, cl::sycl::cl_float4, global_space, u)
176 SYCL_PSTORE(
double, cl::sycl::cl_double2, global_space, )
177 SYCL_PSTORE(
double, cl::sycl::cl_double2, global_space, u)
178 SYCL_PSTORE(
float, cl::sycl::cl_float4, local_space, )
179 SYCL_PSTORE(
float, cl::sycl::cl_float4, local_space, u)
180 SYCL_PSTORE(
double, cl::sycl::cl_double2, local_space, )
181 SYCL_PSTORE(
double, cl::sycl::cl_double2, local_space, u)
183 SYCL_PSTORE(
float, cl::sycl::cl_float4, private_space, )
184 SYCL_PSTORE(
float, cl::sycl::cl_float4, private_space, u)
185 SYCL_PSTORE(
double, cl::sycl::cl_double2, private_space, )
186 SYCL_PSTORE(
double, cl::sycl::cl_double2, private_space, u)
189 #define SYCL_PSTORE_T(address_space_target) \ 190 template <typename scalar, typename packet_type, int Alignment> \ 191 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret( \ 192 typename cl::sycl::multi_ptr< \ 194 cl::sycl::access::address_space::address_space_target>::pointer_t \ 196 const packet_type& from) { \ 203 SYCL_PSTORE_T(global_space)
205 SYCL_PSTORE_T(local_space)
209 #define SYCL_PSET1(packet_type) \ 211 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pset1<packet_type>( \ 212 const typename unpacket_traits<packet_type>::type& from) { \ 213 return packet_type(from); \ 217 SYCL_PSET1(cl::sycl::cl_float4)
218 SYCL_PSET1(cl::sycl::cl_double2)
222 template <
typename packet_type>
223 struct get_base_packet {
224 template <
typename sycl_multi_po
inter>
226 get_ploaddup(sycl_multi_pointer) {}
228 template <
typename sycl_multi_po
inter>
230 get_pgather(sycl_multi_pointer,
Index) {}
234 struct get_base_packet<cl::sycl::cl_float4> {
235 template <
typename sycl_multi_po
inter>
237 sycl_multi_pointer from) {
238 return cl::sycl::cl_float4(from[0], from[0], from[1], from[1]);
240 template <
typename sycl_multi_po
inter>
242 sycl_multi_pointer from,
Index stride) {
243 return cl::sycl::cl_float4(from[0 * stride], from[1 * stride],
244 from[2 * stride], from[3 * stride]);
247 template <
typename sycl_multi_po
inter>
249 sycl_multi_pointer to,
const cl::sycl::cl_float4& from,
Index stride) {
253 to[tmp += stride] = from.z();
254 to[tmp += stride] = from.w();
258 return cl::sycl::cl_float4(static_cast<float>(a), static_cast<float>(a + 1),
259 static_cast<float>(a + 2),
260 static_cast<float>(a + 3));
265 struct get_base_packet<cl::sycl::cl_double2> {
266 template <
typename sycl_multi_po
inter>
268 get_ploaddup(
const sycl_multi_pointer from) {
269 return cl::sycl::cl_double2(from[0], from[0]);
272 template <
typename sycl_multi_po
inter,
typename Index>
274 const sycl_multi_pointer from,
Index stride) {
275 return cl::sycl::cl_double2(from[0 * stride], from[1 * stride]);
278 template <
typename sycl_multi_po
inter>
280 sycl_multi_pointer to,
const cl::sycl::cl_double2& from,
Index stride) {
282 to[stride] = from.y();
287 return cl::sycl::cl_double2(static_cast<double>(a),
288 static_cast<double>(a + 1));
292 #define SYCL_PLOAD_DUP(address_space_target) \ 293 template <typename packet_type> \ 294 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ploaddup( \ 295 typename cl::sycl::multi_ptr< \ 296 const typename unpacket_traits<packet_type>::type, \ 297 cl::sycl::access::address_space::address_space_target>::pointer_t \ 299 return get_base_packet<packet_type>::get_ploaddup(from); \ 303 SYCL_PLOAD_DUP(global_space)
305 SYCL_PLOAD_DUP(local_space)
306 #undef SYCL_PLOAD_DUP 308 #define SYCL_PLOAD_DUP_SPECILIZE(packet_type) \ 310 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ploaddup<packet_type>( \ 311 const typename unpacket_traits<packet_type>::type* from) { \ 312 return get_base_packet<packet_type>::get_ploaddup(from); \ 315 SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_float4)
316 SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_double2)
318 #undef SYCL_PLOAD_DUP_SPECILIZE 320 #define SYCL_PLSET(packet_type) \ 322 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type plset<packet_type>( \ 323 const typename unpacket_traits<packet_type>::type& a) { \ 324 return get_base_packet<packet_type>::set_plset(a); \ 327 SYCL_PLSET(cl::sycl::cl_float4)
328 SYCL_PLSET(cl::sycl::cl_double2)
332 #define SYCL_PGATHER(address_space_target) \ 333 template <typename Scalar, typename packet_type> \ 334 EIGEN_DEVICE_FUNC inline packet_type pgather( \ 335 typename cl::sycl::multi_ptr< \ 336 const typename unpacket_traits<packet_type>::type, \ 337 cl::sycl::access::address_space::address_space_target>::pointer_t \ 340 return get_base_packet<packet_type>::get_pgather(from, stride); \ 344 SYCL_PGATHER(global_space)
346 SYCL_PGATHER(local_space)
350 #define SYCL_PGATHER_SPECILIZE(scalar, packet_type) \ 352 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type \ 353 pgather<scalar, packet_type>( \ 354 const typename unpacket_traits<packet_type>::type* from, Index stride) { \ 355 return get_base_packet<packet_type>::get_pgather(from, stride); \ 358 SYCL_PGATHER_SPECILIZE(
float, cl::sycl::cl_float4)
359 SYCL_PGATHER_SPECILIZE(
double, cl::sycl::cl_double2)
361 #undef SYCL_PGATHER_SPECILIZE 363 #define SYCL_PSCATTER(address_space_target) \ 364 template <typename Scalar, typename packet_type> \ 365 EIGEN_DEVICE_FUNC inline void pscatter( \ 366 typename cl::sycl::multi_ptr< \ 367 typename unpacket_traits<packet_type>::type, \ 368 cl::sycl::access::address_space::address_space_target>::pointer_t \ 370 const packet_type& from, Index stride) { \ 371 get_base_packet<packet_type>::set_pscatter(to, from, stride); \ 375 SYCL_PSCATTER(global_space)
377 SYCL_PSCATTER(local_space)
381 #define SYCL_PSCATTER_SPECILIZE(scalar, packet_type) \ 383 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<scalar, packet_type>( \ 384 typename unpacket_traits<packet_type>::type * to, \ 385 const packet_type& from, Index stride) { \ 386 get_base_packet<packet_type>::set_pscatter(to, from, stride); \ 389 SYCL_PSCATTER_SPECILIZE(
float, cl::sycl::cl_float4)
390 SYCL_PSCATTER_SPECILIZE(
double, cl::sycl::cl_double2)
392 #undef SYCL_PSCATTER_SPECILIZE 394 #define SYCL_PMAD(packet_type) \ 396 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pmadd( \ 397 const packet_type& a, const packet_type& b, const packet_type& c) { \ 398 return cl::sycl::mad(a, b, c); \ 401 SYCL_PMAD(cl::sycl::cl_float4)
402 SYCL_PMAD(cl::sycl::cl_double2)
407 const cl::sycl::cl_float4&
a) {
412 const cl::sycl::cl_double2&
a) {
418 const cl::sycl::cl_float4&
a) {
419 return a.x() +
a.y() +
a.z() +
a.w();
424 const cl::sycl::cl_double2&
a) {
425 return a.x() +
a.y();
430 const cl::sycl::cl_float4&
a) {
436 const cl::sycl::cl_double2&
a) {
442 const cl::sycl::cl_float4&
a) {
448 const cl::sycl::cl_double2&
a) {
454 const cl::sycl::cl_float4&
a) {
455 return a.x() *
a.y() *
a.z() *
a.w();
459 const cl::sycl::cl_double2&
a) {
460 return a.x() *
a.y();
465 pabs<cl::sycl::cl_float4>(
const cl::sycl::cl_float4&
a) {
471 pabs<cl::sycl::cl_double2>(
const cl::sycl::cl_double2&
a) {
475 template <
typename Packet>
480 cl::sycl::rounding_mode::automatic>());
483 template <
typename Packet>
488 cl::sycl::rounding_mode::automatic>());
491 template <
typename Packet>
496 cl::sycl::rounding_mode::automatic>());
499 #define SYCL_PCMP(OP, TYPE) \ 501 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE TYPE pcmp_##OP<TYPE>(const TYPE &a, \ 503 return sycl_pcmp_##OP<TYPE>(a, b); \ 506 SYCL_PCMP(le, cl::sycl::cl_float4)
507 SYCL_PCMP(lt, cl::sycl::cl_float4)
508 SYCL_PCMP(eq, cl::sycl::cl_float4)
509 SYCL_PCMP(le, cl::sycl::cl_double2)
510 SYCL_PCMP(lt, cl::sycl::cl_double2)
511 SYCL_PCMP(eq, cl::sycl::cl_double2)
514 template <
typename T>
struct convert_to_integer;
516 template <>
struct convert_to_integer<float> {
518 using packet_type = cl::sycl::cl_int4;
520 template <>
struct convert_to_integer<double> {
522 using packet_type = cl::sycl::cl_long2;
525 template <
typename PacketIn>
528 vector_as_int(
const PacketIn &
p) {
530 p.template
convert<
typename convert_to_integer<
532 cl::sycl::rounding_mode::automatic>());
535 template <
typename packetOut,
typename PacketIn>
537 convert_vector(
const PacketIn &p) {
539 cl::sycl::rounding_mode::automatic>());
542 #define SYCL_PAND(TYPE) \ 544 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pand<TYPE>(const TYPE &a, \ 546 return convert_vector<TYPE>(vector_as_int(a) & vector_as_int(b)); \ 548 SYCL_PAND(cl::sycl::cl_float4)
549 SYCL_PAND(cl::sycl::cl_double2)
552 #define SYCL_POR(TYPE) \ 554 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE por<TYPE>(const TYPE &a, \ 556 return convert_vector<TYPE>(vector_as_int(a) | vector_as_int(b)); \ 559 SYCL_POR(cl::sycl::cl_float4)
560 SYCL_POR(cl::sycl::cl_double2)
563 #define SYCL_PXOR(TYPE) \ 565 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pxor<TYPE>(const TYPE &a, \ 567 return convert_vector<TYPE>(vector_as_int(a) ^ vector_as_int(b)); \ 570 SYCL_PXOR(cl::sycl::cl_float4)
571 SYCL_PXOR(cl::sycl::cl_double2)
574 #define SYCL_PANDNOT(TYPE) \ 576 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pandnot<TYPE>(const TYPE &a, \ 578 return convert_vector<TYPE>(vector_as_int(a) & (~vector_as_int(b))); \ 580 SYCL_PANDNOT(cl::sycl::cl_float4)
581 SYCL_PANDNOT(cl::sycl::cl_double2)
586 float tmp = kernel.
packet[0].y();
588 kernel.
packet[1].x() = tmp;
590 tmp = kernel.
packet[0].z();
592 kernel.
packet[2].x() = tmp;
594 tmp = kernel.
packet[0].w();
596 kernel.
packet[3].x() = tmp;
598 tmp = kernel.
packet[1].z();
600 kernel.
packet[2].y() = tmp;
602 tmp = kernel.
packet[1].w();
604 kernel.
packet[3].y() = tmp;
606 tmp = kernel.
packet[2].w();
608 kernel.
packet[3].z() = tmp;
613 double tmp = kernel.
packet[0].y();
615 kernel.
packet[1].x() = tmp;
621 const cl::sycl::cl_float4& thenPacket,
622 const cl::sycl::cl_float4& elsePacket) {
623 cl::sycl::cl_int4 condition(
624 ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1,
625 ifPacket.select[2] ? 0 : -1, ifPacket.select[3] ? 0 : -1);
626 return cl::sycl::select(thenPacket, elsePacket, condition);
630 inline cl::sycl::cl_double2
pblend(
632 const cl::sycl::cl_double2& thenPacket,
633 const cl::sycl::cl_double2& elsePacket) {
634 cl::sycl::cl_long2 condition(ifPacket.select[0] ? 0 : -1,
635 ifPacket.select[1] ? 0 : -1);
636 return cl::sycl::select(thenPacket, elsePacket, condition);
638 #endif // SYCL_DEVICE_ONLY 640 #define SYCL_PSTORE(alignment) \ 641 template <typename packet_type> \ 642 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment( \ 643 const Eigen::TensorSycl::internal::RangeAccess< \ 644 cl::sycl::access::mode::read_write, \ 645 typename unpacket_traits<packet_type>::type>& to, \ 646 const packet_type& from) { \ 647 pstore##alignment(to.get_pointer(), from); \ 656 template <
typename scalar,
typename packet_type,
int Alignment>
658 Eigen::TensorSycl::internal::RangeAccess<
659 cl::sycl::access::mode::read_write,
662 const packet_type& from) {
663 pstoret<scalar, packet_type, Alignment>(to.get_pointer(), from);
670 #endif // EIGEN_PACKET_MATH_SYCL_H #define EIGEN_ALWAYS_INLINE
#define EIGEN_STRONG_INLINE
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmin(const bfloat16 &a, const bfloat16 &b)
Namespace containing all symbols from the Eigen library.
#define SYCL_PSTORE(alignment)
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmax(const bfloat16 &a, const bfloat16 &b)
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Eigen::TensorSycl::internal::RangeAccess< cl::sycl::access::mode::read_write, typename unpacket_traits< packet_type >::type > to, const packet_type &from)
EIGEN_STRONG_INLINE void ptranspose(PacketBlock< Packet2cf, 2 > &kernel)
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt_ro(const Eigen::TensorSycl::internal::RangeAccess< cl::sycl::access::mode::read_write, T > &from)
#define EIGEN_DEVICE_FUNC
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt(const Eigen::TensorSycl::internal::RangeAccess< cl::sycl::access::mode::read_write, typename unpacket_traits< packet_type >::type > &from)
static BinaryMeasurement< Rot3 > convert(const BetweenFactor< Pose3 >::shared_ptr &f)
EIGEN_STRONG_INLINE Packet4i pblend(const Selector< 4 > &ifPacket, const Packet4i &thenPacket, const Packet4i &elsePacket)
#define SYCL_PLOAD(Alignment, AlignedType)