SYCL/PacketMath.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Mehdi Goli Codeplay Software Ltd.
5 // Ralph Potter Codeplay Software Ltd.
6 // Luke Iwanski Codeplay Software Ltd.
7 // Contact: <eigen@codeplay.com>
8 //
9 // This Source Code Form is subject to the terms of the Mozilla
10 // Public License v. 2.0. If a copy of the MPL was not distributed
11 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
12 
13 /*****************************************************************
14  * PacketMath.h
15  *
16  * \brief:
17  * PacketMath
18  *
19  *****************************************************************/
20 
21 #ifndef EIGEN_PACKET_MATH_SYCL_H
22 #define EIGEN_PACKET_MATH_SYCL_H
23 #include <type_traits>
24 namespace Eigen {
25 
26 namespace internal {
27 #ifdef SYCL_DEVICE_ONLY
28 
29 #define SYCL_PLOADT_RO(address_space_target) \
30  template <typename packet_type, int Alignment> \
31  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt_ro( \
32  typename cl::sycl::multi_ptr< \
33  const typename unpacket_traits<packet_type>::type, \
34  cl::sycl::access::address_space::address_space_target>::pointer_t \
35  from) { \
36  typedef typename unpacket_traits<packet_type>::type scalar; \
37  typedef cl::sycl::multi_ptr< \
38  scalar, cl::sycl::access::address_space::address_space_target> \
39  multi_ptr; \
40  auto res = packet_type( \
41  static_cast<typename unpacket_traits<packet_type>::type>(0)); \
42  res.load(0, multi_ptr(const_cast<typename multi_ptr::pointer_t>(from))); \
43  return res; \
44  }
45 
46 SYCL_PLOADT_RO(global_space)
47 SYCL_PLOADT_RO(local_space)
48 #undef SYCL_PLOADT_RO
49 #endif
50 
51 template <typename packet_type, int Alignment, typename T>
53 ploadt_ro(const Eigen::TensorSycl::internal::RangeAccess<
54  cl::sycl::access::mode::read_write, T>& from) {
55  return ploadt_ro<packet_type, Alignment>(from.get_pointer());
56 }
57 
58 #ifdef SYCL_DEVICE_ONLY
59 #define SYCL_PLOAD(address_space_target, Alignment, AlignedType) \
60  template <typename packet_type> \
61  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType( \
62  typename cl::sycl::multi_ptr< \
63  const typename unpacket_traits<packet_type>::type, \
64  cl::sycl::access::address_space::address_space_target>::pointer_t \
65  from) { \
66  return ploadt_ro<packet_type, Alignment>(from); \
67  }
68 
69 // global space
70 SYCL_PLOAD(global_space, Unaligned, u)
71 SYCL_PLOAD(global_space, Aligned, )
72 // local space
73 SYCL_PLOAD(local_space, Unaligned, u)
74 SYCL_PLOAD(local_space, Aligned, )
75 
76 #undef SYCL_PLOAD
77 #endif
78 
79 #define SYCL_PLOAD(Alignment, AlignedType) \
80  template <typename packet_type> \
81  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType( \
82  const Eigen::TensorSycl::internal::RangeAccess< \
83  cl::sycl::access::mode::read_write, \
84  typename unpacket_traits<packet_type>::type> \
85  from) { \
86  return ploadt_ro<packet_type, Alignment>(from); \
87  }
90 #undef SYCL_PLOAD
91 
92 #ifdef SYCL_DEVICE_ONLY
93 
95 #define SYCL_PLOADT(address_space_target) \
96  template <typename packet_type, int Alignment> \
97  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt( \
98  typename cl::sycl::multi_ptr< \
99  const typename unpacket_traits<packet_type>::type, \
100  cl::sycl::access::address_space::address_space_target>::pointer_t \
101  from) { \
102  if (Alignment >= unpacket_traits<packet_type>::alignment) \
103  return pload<packet_type>(from); \
104  else \
105  return ploadu<packet_type>(from); \
106  }
107 
108 // global space
109 SYCL_PLOADT(global_space)
110 // local space
111 SYCL_PLOADT(local_space)
112 #undef SYCL_PLOADT
113 #endif
114 
115 template <typename packet_type, int Alignment>
117 ploadt(const Eigen::TensorSycl::internal::RangeAccess<
118  cl::sycl::access::mode::read_write,
119  typename unpacket_traits<packet_type>::type>& from) {
120  return ploadt<packet_type, Alignment>(from.get_pointer());
121 }
122 #ifdef SYCL_DEVICE_ONLY
123 
124 // private_space
125 #define SYCL_PLOADT_RO_SPECIAL(packet_type, Alignment) \
126  template <> \
127  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type \
128  ploadt_ro<packet_type, Alignment>( \
129  const typename unpacket_traits<packet_type>::type* from) { \
130  typedef typename unpacket_traits<packet_type>::type scalar; \
131  auto res = packet_type(static_cast<scalar>(0)); \
132  res.template load<cl::sycl::access::address_space::private_space>( \
133  0, const_cast<scalar*>(from)); \
134  return res; \
135  }
136 
137 SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Aligned)
138 SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2, Aligned)
139 SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Unaligned)
140 SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2, Unaligned)
141 
142 #define SYCL_PLOAD_SPECIAL(packet_type, alignment_type) \
143  template <> \
144  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##alignment_type( \
145  const typename unpacket_traits<packet_type>::type* from) { \
146  typedef typename unpacket_traits<packet_type>::type scalar; \
147  auto res = packet_type(static_cast<scalar>(0)); \
148  res.template load<cl::sycl::access::address_space::private_space>( \
149  0, const_cast<scalar*>(from)); \
150  return res; \
151  }
152 SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4, )
153 SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2, )
154 SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4, u)
155 SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2, u)
156 
157 #undef SYCL_PLOAD_SPECIAL
158 
159 #define SYCL_PSTORE(scalar, packet_type, address_space_target, alignment) \
160  template <> \
161  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment( \
162  typename cl::sycl::multi_ptr< \
163  scalar, \
164  cl::sycl::access::address_space::address_space_target>::pointer_t \
165  to, \
166  const packet_type& from) { \
167  typedef cl::sycl::multi_ptr< \
168  scalar, cl::sycl::access::address_space::address_space_target> \
169  multi_ptr; \
170  from.store(0, multi_ptr(to)); \
171  }
172 
173 // global space
174 SYCL_PSTORE(float, cl::sycl::cl_float4, global_space, )
175 SYCL_PSTORE(float, cl::sycl::cl_float4, global_space, u)
176 SYCL_PSTORE(double, cl::sycl::cl_double2, global_space, )
177 SYCL_PSTORE(double, cl::sycl::cl_double2, global_space, u)
178 SYCL_PSTORE(float, cl::sycl::cl_float4, local_space, )
179 SYCL_PSTORE(float, cl::sycl::cl_float4, local_space, u)
180 SYCL_PSTORE(double, cl::sycl::cl_double2, local_space, )
181 SYCL_PSTORE(double, cl::sycl::cl_double2, local_space, u)
182 
183 SYCL_PSTORE(float, cl::sycl::cl_float4, private_space, )
184 SYCL_PSTORE(float, cl::sycl::cl_float4, private_space, u)
185 SYCL_PSTORE(double, cl::sycl::cl_double2, private_space, )
186 SYCL_PSTORE(double, cl::sycl::cl_double2, private_space, u)
187 #undef SYCL_PSTORE
188 
189 #define SYCL_PSTORE_T(address_space_target) \
190  template <typename scalar, typename packet_type, int Alignment> \
191  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret( \
192  typename cl::sycl::multi_ptr< \
193  scalar, \
194  cl::sycl::access::address_space::address_space_target>::pointer_t \
195  to, \
196  const packet_type& from) { \
197  if (Alignment) \
198  pstore(to, from); \
199  else \
200  pstoreu(to, from); \
201  }
202 
203 SYCL_PSTORE_T(global_space)
204 
205 SYCL_PSTORE_T(local_space)
206 
207 #undef SYCL_PSTORE_T
208 
209 #define SYCL_PSET1(packet_type) \
210  template <> \
211  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pset1<packet_type>( \
212  const typename unpacket_traits<packet_type>::type& from) { \
213  return packet_type(from); \
214  }
215 
216 // global space
217 SYCL_PSET1(cl::sycl::cl_float4)
218 SYCL_PSET1(cl::sycl::cl_double2)
219 
220 #undef SYCL_PSET1
221 
222 template <typename packet_type>
223 struct get_base_packet {
224  template <typename sycl_multi_pointer>
225  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type
226  get_ploaddup(sycl_multi_pointer) {}
227 
228  template <typename sycl_multi_pointer>
229  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type
230  get_pgather(sycl_multi_pointer, Index) {}
231 };
232 
233 template <>
234 struct get_base_packet<cl::sycl::cl_float4> {
235  template <typename sycl_multi_pointer>
236  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_ploaddup(
237  sycl_multi_pointer from) {
238  return cl::sycl::cl_float4(from[0], from[0], from[1], from[1]);
239  }
240  template <typename sycl_multi_pointer>
241  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_pgather(
242  sycl_multi_pointer from, Index stride) {
243  return cl::sycl::cl_float4(from[0 * stride], from[1 * stride],
244  from[2 * stride], from[3 * stride]);
245  }
246 
247  template <typename sycl_multi_pointer>
248  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(
249  sycl_multi_pointer to, const cl::sycl::cl_float4& from, Index stride) {
250  auto tmp = stride;
251  to[0] = from.x();
252  to[tmp] = from.y();
253  to[tmp += stride] = from.z();
254  to[tmp += stride] = from.w();
255  }
256  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 set_plset(
257  const float& a) {
258  return cl::sycl::cl_float4(static_cast<float>(a), static_cast<float>(a + 1),
259  static_cast<float>(a + 2),
260  static_cast<float>(a + 3));
261  }
262 };
263 
264 template <>
265 struct get_base_packet<cl::sycl::cl_double2> {
266  template <typename sycl_multi_pointer>
267  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2
268  get_ploaddup(const sycl_multi_pointer from) {
269  return cl::sycl::cl_double2(from[0], from[0]);
270  }
271 
272  template <typename sycl_multi_pointer, typename Index>
273  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_pgather(
274  const sycl_multi_pointer from, Index stride) {
275  return cl::sycl::cl_double2(from[0 * stride], from[1 * stride]);
276  }
277 
278  template <typename sycl_multi_pointer>
279  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(
280  sycl_multi_pointer to, const cl::sycl::cl_double2& from, Index stride) {
281  to[0] = from.x();
282  to[stride] = from.y();
283  }
284 
285  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 set_plset(
286  const double& a) {
287  return cl::sycl::cl_double2(static_cast<double>(a),
288  static_cast<double>(a + 1));
289  }
290 };
291 
292 #define SYCL_PLOAD_DUP(address_space_target) \
293  template <typename packet_type> \
294  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ploaddup( \
295  typename cl::sycl::multi_ptr< \
296  const typename unpacket_traits<packet_type>::type, \
297  cl::sycl::access::address_space::address_space_target>::pointer_t \
298  from) { \
299  return get_base_packet<packet_type>::get_ploaddup(from); \
300  }
301 
302 // global space
303 SYCL_PLOAD_DUP(global_space)
304 // local_space
305 SYCL_PLOAD_DUP(local_space)
306 #undef SYCL_PLOAD_DUP
307 
308 #define SYCL_PLOAD_DUP_SPECILIZE(packet_type) \
309  template <> \
310  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ploaddup<packet_type>( \
311  const typename unpacket_traits<packet_type>::type* from) { \
312  return get_base_packet<packet_type>::get_ploaddup(from); \
313  }
314 
315 SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_float4)
316 SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_double2)
317 
318 #undef SYCL_PLOAD_DUP_SPECILIZE
319 
320 #define SYCL_PLSET(packet_type) \
321  template <> \
322  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type plset<packet_type>( \
323  const typename unpacket_traits<packet_type>::type& a) { \
324  return get_base_packet<packet_type>::set_plset(a); \
325  }
326 
327 SYCL_PLSET(cl::sycl::cl_float4)
328 SYCL_PLSET(cl::sycl::cl_double2)
329 
330 #undef SYCL_PLSET
331 
332 #define SYCL_PGATHER(address_space_target) \
333  template <typename Scalar, typename packet_type> \
334  EIGEN_DEVICE_FUNC inline packet_type pgather( \
335  typename cl::sycl::multi_ptr< \
336  const typename unpacket_traits<packet_type>::type, \
337  cl::sycl::access::address_space::address_space_target>::pointer_t \
338  from, \
339  Index stride) { \
340  return get_base_packet<packet_type>::get_pgather(from, stride); \
341  }
342 
343 // global space
344 SYCL_PGATHER(global_space)
345 // local space
346 SYCL_PGATHER(local_space)
347 
348 #undef SYCL_PGATHER
349 
350 #define SYCL_PGATHER_SPECILIZE(scalar, packet_type) \
351  template <> \
352  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type \
353  pgather<scalar, packet_type>( \
354  const typename unpacket_traits<packet_type>::type* from, Index stride) { \
355  return get_base_packet<packet_type>::get_pgather(from, stride); \
356  }
357 
358 SYCL_PGATHER_SPECILIZE(float, cl::sycl::cl_float4)
359 SYCL_PGATHER_SPECILIZE(double, cl::sycl::cl_double2)
360 
361 #undef SYCL_PGATHER_SPECILIZE
362 
363 #define SYCL_PSCATTER(address_space_target) \
364  template <typename Scalar, typename packet_type> \
365  EIGEN_DEVICE_FUNC inline void pscatter( \
366  typename cl::sycl::multi_ptr< \
367  typename unpacket_traits<packet_type>::type, \
368  cl::sycl::access::address_space::address_space_target>::pointer_t \
369  to, \
370  const packet_type& from, Index stride) { \
371  get_base_packet<packet_type>::set_pscatter(to, from, stride); \
372  }
373 
374 // global space
375 SYCL_PSCATTER(global_space)
376 // local space
377 SYCL_PSCATTER(local_space)
378 
379 #undef SYCL_PSCATTER
380 
381 #define SYCL_PSCATTER_SPECILIZE(scalar, packet_type) \
382  template <> \
383  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<scalar, packet_type>( \
384  typename unpacket_traits<packet_type>::type * to, \
385  const packet_type& from, Index stride) { \
386  get_base_packet<packet_type>::set_pscatter(to, from, stride); \
387  }
388 
389 SYCL_PSCATTER_SPECILIZE(float, cl::sycl::cl_float4)
390 SYCL_PSCATTER_SPECILIZE(double, cl::sycl::cl_double2)
391 
392 #undef SYCL_PSCATTER_SPECILIZE
393 
394 #define SYCL_PMAD(packet_type) \
395  template <> \
396  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pmadd( \
397  const packet_type& a, const packet_type& b, const packet_type& c) { \
398  return cl::sycl::mad(a, b, c); \
399  }
400 
401 SYCL_PMAD(cl::sycl::cl_float4)
402 SYCL_PMAD(cl::sycl::cl_double2)
403 #undef SYCL_PMAD
404 
405 template <>
406 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float pfirst<cl::sycl::cl_float4>(
407  const cl::sycl::cl_float4& a) {
408  return a.x();
409 }
410 template <>
411 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double pfirst<cl::sycl::cl_double2>(
412  const cl::sycl::cl_double2& a) {
413  return a.x();
414 }
415 
416 template <>
417 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux<cl::sycl::cl_float4>(
418  const cl::sycl::cl_float4& a) {
419  return a.x() + a.y() + a.z() + a.w();
420 }
421 
422 template <>
423 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux<cl::sycl::cl_double2>(
424  const cl::sycl::cl_double2& a) {
425  return a.x() + a.y();
426 }
427 
428 template <>
429 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_max<cl::sycl::cl_float4>(
430  const cl::sycl::cl_float4& a) {
431  return cl::sycl::fmax(cl::sycl::fmax(a.x(), a.y()),
432  cl::sycl::fmax(a.z(), a.w()));
433 }
434 template <>
435 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_max<cl::sycl::cl_double2>(
436  const cl::sycl::cl_double2& a) {
437  return cl::sycl::fmax(a.x(), a.y());
438 }
439 
440 template <>
441 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_min<cl::sycl::cl_float4>(
442  const cl::sycl::cl_float4& a) {
443  return cl::sycl::fmin(cl::sycl::fmin(a.x(), a.y()),
444  cl::sycl::fmin(a.z(), a.w()));
445 }
446 template <>
447 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_min<cl::sycl::cl_double2>(
448  const cl::sycl::cl_double2& a) {
449  return cl::sycl::fmin(a.x(), a.y());
450 }
451 
452 template <>
453 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_mul<cl::sycl::cl_float4>(
454  const cl::sycl::cl_float4& a) {
455  return a.x() * a.y() * a.z() * a.w();
456 }
457 template <>
458 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_mul<cl::sycl::cl_double2>(
459  const cl::sycl::cl_double2& a) {
460  return a.x() * a.y();
461 }
462 
463 template <>
464 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4
465 pabs<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
466  return cl::sycl::cl_float4(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()),
467  cl::sycl::fabs(a.z()), cl::sycl::fabs(a.w()));
468 }
469 template <>
470 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2
471 pabs<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
472  return cl::sycl::cl_double2(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()));
473 }
474 
475 template <typename Packet>
477  const Packet &b) {
478  return ((a <= b)
479  .template convert<typename unpacket_traits<Packet>::type,
480  cl::sycl::rounding_mode::automatic>());
481 }
482 
483 template <typename Packet>
485  const Packet &b) {
486  return ((a < b)
487  .template convert<typename unpacket_traits<Packet>::type,
488  cl::sycl::rounding_mode::automatic>());
489 }
490 
491 template <typename Packet>
493  const Packet &b) {
494  return ((a == b)
495  .template convert<typename unpacket_traits<Packet>::type,
496  cl::sycl::rounding_mode::automatic>());
497 }
498 
499 #define SYCL_PCMP(OP, TYPE) \
500  template <> \
501  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE TYPE pcmp_##OP<TYPE>(const TYPE &a, \
502  const TYPE &b) { \
503  return sycl_pcmp_##OP<TYPE>(a, b); \
504  }
505 
506 SYCL_PCMP(le, cl::sycl::cl_float4)
507 SYCL_PCMP(lt, cl::sycl::cl_float4)
508 SYCL_PCMP(eq, cl::sycl::cl_float4)
509 SYCL_PCMP(le, cl::sycl::cl_double2)
510 SYCL_PCMP(lt, cl::sycl::cl_double2)
511 SYCL_PCMP(eq, cl::sycl::cl_double2)
512 #undef SYCL_PCMP
513 
514 template <typename T> struct convert_to_integer;
515 
516 template <> struct convert_to_integer<float> {
517  using type = std::int32_t;
518  using packet_type = cl::sycl::cl_int4;
519 };
520 template <> struct convert_to_integer<double> {
521  using type = std::int64_t;
522  using packet_type = cl::sycl::cl_long2;
523 };
524 
525 template <typename PacketIn>
526 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename convert_to_integer<
527  typename unpacket_traits<PacketIn>::type>::packet_type
528 vector_as_int(const PacketIn &p) {
529  return (
530  p.template convert<typename convert_to_integer<
532  cl::sycl::rounding_mode::automatic>());
533 }
534 
535 template <typename packetOut, typename PacketIn>
537 convert_vector(const PacketIn &p) {
538  return (p.template convert<typename unpacket_traits<packetOut>::type,
539  cl::sycl::rounding_mode::automatic>());
540 }
541 
542 #define SYCL_PAND(TYPE) \
543  template <> \
544  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pand<TYPE>(const TYPE &a, \
545  const TYPE &b) { \
546  return convert_vector<TYPE>(vector_as_int(a) & vector_as_int(b)); \
547  }
548 SYCL_PAND(cl::sycl::cl_float4)
549 SYCL_PAND(cl::sycl::cl_double2)
550 #undef SYCL_PAND
551 
552 #define SYCL_POR(TYPE) \
553  template <> \
554  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE por<TYPE>(const TYPE &a, \
555  const TYPE &b) { \
556  return convert_vector<TYPE>(vector_as_int(a) | vector_as_int(b)); \
557  }
558 
559 SYCL_POR(cl::sycl::cl_float4)
560 SYCL_POR(cl::sycl::cl_double2)
561 #undef SYCL_POR
562 
563 #define SYCL_PXOR(TYPE) \
564  template <> \
565  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pxor<TYPE>(const TYPE &a, \
566  const TYPE &b) { \
567  return convert_vector<TYPE>(vector_as_int(a) ^ vector_as_int(b)); \
568  }
569 
570 SYCL_PXOR(cl::sycl::cl_float4)
571 SYCL_PXOR(cl::sycl::cl_double2)
572 #undef SYCL_PXOR
573 
574 #define SYCL_PANDNOT(TYPE) \
575  template <> \
576  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pandnot<TYPE>(const TYPE &a, \
577  const TYPE &b) { \
578  return convert_vector<TYPE>(vector_as_int(a) & (~vector_as_int(b))); \
579  }
580 SYCL_PANDNOT(cl::sycl::cl_float4)
581 SYCL_PANDNOT(cl::sycl::cl_double2)
582 #undef SYCL_PANDNOT
583 
585  PacketBlock<cl::sycl::cl_float4, 4>& kernel) {
586  float tmp = kernel.packet[0].y();
587  kernel.packet[0].y() = kernel.packet[1].x();
588  kernel.packet[1].x() = tmp;
589 
590  tmp = kernel.packet[0].z();
591  kernel.packet[0].z() = kernel.packet[2].x();
592  kernel.packet[2].x() = tmp;
593 
594  tmp = kernel.packet[0].w();
595  kernel.packet[0].w() = kernel.packet[3].x();
596  kernel.packet[3].x() = tmp;
597 
598  tmp = kernel.packet[1].z();
599  kernel.packet[1].z() = kernel.packet[2].y();
600  kernel.packet[2].y() = tmp;
601 
602  tmp = kernel.packet[1].w();
603  kernel.packet[1].w() = kernel.packet[3].y();
604  kernel.packet[3].y() = tmp;
605 
606  tmp = kernel.packet[2].w();
607  kernel.packet[2].w() = kernel.packet[3].z();
608  kernel.packet[3].z() = tmp;
609 }
610 
612  PacketBlock<cl::sycl::cl_double2, 2>& kernel) {
613  double tmp = kernel.packet[0].y();
614  kernel.packet[0].y() = kernel.packet[1].x();
615  kernel.packet[1].x() = tmp;
616 }
617 
618 template <>
619 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pblend(
620  const Selector<unpacket_traits<cl::sycl::cl_float4>::size>& ifPacket,
621  const cl::sycl::cl_float4& thenPacket,
622  const cl::sycl::cl_float4& elsePacket) {
623  cl::sycl::cl_int4 condition(
624  ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1,
625  ifPacket.select[2] ? 0 : -1, ifPacket.select[3] ? 0 : -1);
626  return cl::sycl::select(thenPacket, elsePacket, condition);
627 }
628 
629 template <>
630 inline cl::sycl::cl_double2 pblend(
631  const Selector<unpacket_traits<cl::sycl::cl_double2>::size>& ifPacket,
632  const cl::sycl::cl_double2& thenPacket,
633  const cl::sycl::cl_double2& elsePacket) {
634  cl::sycl::cl_long2 condition(ifPacket.select[0] ? 0 : -1,
635  ifPacket.select[1] ? 0 : -1);
636  return cl::sycl::select(thenPacket, elsePacket, condition);
637 }
638 #endif // SYCL_DEVICE_ONLY
639 
640 #define SYCL_PSTORE(alignment) \
641  template <typename packet_type> \
642  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment( \
643  const Eigen::TensorSycl::internal::RangeAccess< \
644  cl::sycl::access::mode::read_write, \
645  typename unpacket_traits<packet_type>::type>& to, \
646  const packet_type& from) { \
647  pstore##alignment(to.get_pointer(), from); \
648  }
649 
650 // global space
651 SYCL_PSTORE()
653 
654 #undef SYCL_PSTORE
655 
656 template <typename scalar, typename packet_type, int Alignment>
658  Eigen::TensorSycl::internal::RangeAccess<
659  cl::sycl::access::mode::read_write,
661  to,
662  const packet_type& from) {
663  pstoret<scalar, packet_type, Alignment>(to.get_pointer(), from);
664 }
665 
666 } // end namespace internal
667 
668 } // end namespace Eigen
669 
670 #endif // EIGEN_PACKET_MATH_SYCL_H
EIGEN_DEVICE_FUNC
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:976
Eigen
Namespace containing all symbols from the Eigen library.
Definition: jet.h:637
Eigen::Unaligned
@ Unaligned
Definition: Constants.h:233
b
Scalar * b
Definition: benchVecAdd.cpp:17
SYCL_PLOAD
#define SYCL_PLOAD(Alignment, AlignedType)
Definition: SYCL/PacketMath.h:91
type
Definition: pytypes.h:1525
boost::multiprecision::fabs
Real fabs(const Real &a)
Definition: boostmultiprec.cpp:119
Eigen::internal::pstoret
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Eigen::TensorSycl::internal::RangeAccess< cl::sycl::access::mode::read_write, typename unpacket_traits< packet_type >::type > to, const packet_type &from)
Definition: SYCL/PacketMath.h:669
Eigen::internal::unpacket_traits::size
@ size
Definition: GenericPacketMath.h:138
SYCL_PSTORE
#define SYCL_PSTORE(alignment)
Definition: SYCL/PacketMath.h:652
int64_t
signed __int64 int64_t
Definition: ms_stdint.h:94
Eigen::internal::ptranspose
EIGEN_STRONG_INLINE void ptranspose(PacketBlock< Packet2cf, 2 > &kernel)
Definition: AltiVec/Complex.h:224
EIGEN_STRONG_INLINE
#define EIGEN_STRONG_INLINE
Definition: Macros.h:917
Eigen::internal::pblend
EIGEN_STRONG_INLINE Packet4i pblend(const Selector< 4 > &ifPacket, const Packet4i &thenPacket, const Packet4i &elsePacket)
Definition: AltiVec/PacketMath.h:2107
cl
Definition: cxx11_tensor_builtins_sycl.cpp:30
EIGEN_ALWAYS_INLINE
#define EIGEN_ALWAYS_INLINE
Definition: Macros.h:932
Eigen::Triplet
A small structure to hold a non zero as a triplet (i,j,value).
Definition: SparseUtil.h:162
Eigen::bfloat16_impl::fmin
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmin(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:582
Eigen::internal::ploadt_ro
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt_ro(const Eigen::TensorSycl::internal::RangeAccess< cl::sycl::access::mode::read_write, T > &from)
Definition: SYCL/PacketMath.h:65
a
ArrayXXi a
Definition: Array_initializer_list_23_cxx11.cpp:1
Eigen::internal::unpacket_traits::type
T type
Definition: GenericPacketMath.h:134
Eigen::bfloat16_impl::fmax
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmax(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:587
p
float * p
Definition: Tutorial_Map_using.cpp:9
gtsam.examples.DogLegOptimizerExample.float
float
Definition: DogLegOptimizerExample.py:113
int32_t
signed int int32_t
Definition: ms_stdint.h:82
internal
Definition: BandTriangularSolver.h:13
gtsam::convert
static BinaryMeasurement< Rot3 > convert(const BetweenFactor< Pose3 >::shared_ptr &f)
Definition: ShonanAveraging.cpp:994
Eigen::internal::ploadt
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt(const Eigen::TensorSycl::internal::RangeAccess< cl::sycl::access::mode::read_write, typename unpacket_traits< packet_type >::type > &from)
Definition: SYCL/PacketMath.h:129
Eigen::Index
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:74
Eigen::Aligned
@ Aligned
Definition: Constants.h:240


gtsam
Author(s):
autogenerated on Thu Dec 19 2024 04:02:04