MSA/PacketMath.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2018 Wave Computing, Inc.
5 // Written by:
6 // Chris Larsen
7 // Alexey Frunze (afrunze@wavecomp.com)
8 //
9 // This Source Code Form is subject to the terms of the Mozilla
10 // Public License v. 2.0. If a copy of the MPL was not distributed
11 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
12 
13 #ifndef EIGEN_PACKET_MATH_MSA_H
14 #define EIGEN_PACKET_MATH_MSA_H
15 
16 #include <iostream>
17 #include <string>
18 
19 namespace Eigen {
20 
21 namespace internal {
22 
23 #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
24 #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
25 #endif
26 
27 #ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
28 #define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
29 #endif
30 
31 #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
32 #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
33 #endif
34 
35 #if 0
36 #define EIGEN_MSA_DEBUG \
37  static bool firstTime = true; \
38  do { \
39  if (firstTime) { \
40  std::cout << __FILE__ << ':' << __LINE__ << ':' << __FUNCTION__ << std::endl; \
41  firstTime = false; \
42  } \
43  } while (0)
44 #else
45 #define EIGEN_MSA_DEBUG
46 #endif
47 
48 #define EIGEN_MSA_SHF_I8(a, b, c, d) (((d) << 6) | ((c) << 4) | ((b) << 2) | (a))
49 
50 typedef v4f32 Packet4f;
51 typedef v4i32 Packet4i;
52 typedef v4u32 Packet4ui;
53 
54 #define _EIGEN_DECLARE_CONST_Packet4f(NAME, X) const Packet4f p4f_##NAME = { X, X, X, X }
55 #define _EIGEN_DECLARE_CONST_Packet4i(NAME, X) const Packet4i p4i_##NAME = { X, X, X, X }
56 #define _EIGEN_DECLARE_CONST_Packet4ui(NAME, X) const Packet4ui p4ui_##NAME = { X, X, X, X }
57 
58 inline std::ostream& operator<<(std::ostream& os, const Packet4f& value) {
59  os << "[ " << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << " ]";
60  return os;
61 }
62 
63 inline std::ostream& operator<<(std::ostream& os, const Packet4i& value) {
64  os << "[ " << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << " ]";
65  return os;
66 }
67 
68 inline std::ostream& operator<<(std::ostream& os, const Packet4ui& value) {
69  os << "[ " << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << " ]";
70  return os;
71 }
72 
73 template <>
74 struct packet_traits<float> : default_packet_traits {
75  typedef Packet4f type;
76  typedef Packet4f half; // Packet2f intrinsics not implemented yet
77  enum {
78  Vectorizable = 1,
79  AlignedOnScalar = 1,
80  size = 4,
81  HasHalfPacket = 0, // Packet2f intrinsics not implemented yet
82  // FIXME check the Has*
83  HasDiv = 1,
88  HasLog = 1,
89  HasExp = 1,
90  HasSqrt = 1,
91  HasRsqrt = 1,
92  HasRound = 1,
93  HasFloor = 1,
94  HasCeil = 1,
95  HasBlend = 1
96  };
97 };
98 
99 template <>
101  typedef Packet4i type;
102  typedef Packet4i half; // Packet2i intrinsics not implemented yet
103  enum {
106  size = 4,
107  HasHalfPacket = 0, // Packet2i intrinsics not implemented yet
108  // FIXME check the Has*
109  HasDiv = 1,
111  };
112 };
113 
114 template <>
115 struct unpacket_traits<Packet4f> {
116  typedef float type;
118  typedef Packet4f half;
119 };
120 
121 template <>
122 struct unpacket_traits<Packet4i> {
123  typedef int32_t type;
125  typedef Packet4i half;
126 };
127 
128 template <>
129 EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
131 
132  Packet4f v = { from, from, from, from };
133  return v;
134 }
135 
136 template <>
139 
140  return __builtin_msa_fill_w(from);
141 }
142 
143 template <>
146 
147  float f = *from;
148  Packet4f v = { f, f, f, f };
149  return v;
150 }
151 
152 template <>
155 
156  return __builtin_msa_fill_w(*from);
157 }
158 
159 template <>
162 
163  return __builtin_msa_fadd_w(a, b);
164 }
165 
166 template <>
169 
170  return __builtin_msa_addv_w(a, b);
171 }
172 
173 template <>
176 
177  static const Packet4f countdown = { 0.0f, 1.0f, 2.0f, 3.0f };
178  return padd(pset1<Packet4f>(a), countdown);
179 }
180 
181 template <>
184 
185  static const Packet4i countdown = { 0, 1, 2, 3 };
186  return padd(pset1<Packet4i>(a), countdown);
187 }
188 
189 template <>
192 
193  return __builtin_msa_fsub_w(a, b);
194 }
195 
196 template <>
199 
200  return __builtin_msa_subv_w(a, b);
201 }
202 
203 template <>
206 
207  return (Packet4f)__builtin_msa_bnegi_w((v4u32)a, 31);
208 }
209 
210 template <>
213 
214  return __builtin_msa_addvi_w((v4i32)__builtin_msa_nori_b((v16u8)a, 0), 1);
215 }
216 
217 template <>
220 
221  return a;
222 }
223 
224 template <>
227 
228  return a;
229 }
230 
231 template <>
234 
235  return __builtin_msa_fmul_w(a, b);
236 }
237 
238 template <>
241 
242  return __builtin_msa_mulv_w(a, b);
243 }
244 
245 template <>
248 
249  return __builtin_msa_fdiv_w(a, b);
250 }
251 
252 template <>
255 
256  return __builtin_msa_div_s_w(a, b);
257 }
258 
259 template <>
260 EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) {
262 
263  return __builtin_msa_fmadd_w(c, a, b);
264 }
265 
266 template <>
267 EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) {
269 
270  // Use "asm" construct to avoid __builtin_msa_maddv_w GNU C bug.
271  Packet4i value = c;
272  __asm__("maddv.w %w[value], %w[a], %w[b]\n"
273  // Outputs
274  : [value] "+f"(value)
275  // Inputs
276  : [a] "f"(a), [b] "f"(b));
277  return value;
278 }
279 
280 template <>
283 
284  return (Packet4f)__builtin_msa_and_v((v16u8)a, (v16u8)b);
285 }
286 
287 template <>
290 
291  return (Packet4i)__builtin_msa_and_v((v16u8)a, (v16u8)b);
292 }
293 
294 template <>
297 
298  return (Packet4f)__builtin_msa_or_v((v16u8)a, (v16u8)b);
299 }
300 
301 template <>
304 
305  return (Packet4i)__builtin_msa_or_v((v16u8)a, (v16u8)b);
306 }
307 
308 template <>
311 
312  return (Packet4f)__builtin_msa_xor_v((v16u8)a, (v16u8)b);
313 }
314 
315 template <>
318 
319  return (Packet4i)__builtin_msa_xor_v((v16u8)a, (v16u8)b);
320 }
321 
322 template <>
325 
326  return pand(a, (Packet4f)__builtin_msa_xori_b((v16u8)b, 255));
327 }
328 
329 template <>
332 
333  return pand(a, (Packet4i)__builtin_msa_xori_b((v16u8)b, 255));
334 }
335 
336 template <>
339 
340 #if EIGEN_FAST_MATH
341  // This prefers numbers to NaNs.
342  return __builtin_msa_fmin_w(a, b);
343 #else
344  // This prefers NaNs to numbers.
345  Packet4i aNaN = __builtin_msa_fcun_w(a, a);
346  Packet4i aMinOrNaN = por(__builtin_msa_fclt_w(a, b), aNaN);
347  return (Packet4f)__builtin_msa_bsel_v((v16u8)aMinOrNaN, (v16u8)b, (v16u8)a);
348 #endif
349 }
350 
351 template <>
354 
355  return __builtin_msa_min_s_w(a, b);
356 }
357 
358 template <>
361 
362 #if EIGEN_FAST_MATH
363  // This prefers numbers to NaNs.
364  return __builtin_msa_fmax_w(a, b);
365 #else
366  // This prefers NaNs to numbers.
367  Packet4i aNaN = __builtin_msa_fcun_w(a, a);
368  Packet4i aMaxOrNaN = por(__builtin_msa_fclt_w(b, a), aNaN);
369  return (Packet4f)__builtin_msa_bsel_v((v16u8)aMaxOrNaN, (v16u8)b, (v16u8)a);
370 #endif
371 }
372 
373 template <>
376 
377  return __builtin_msa_max_s_w(a, b);
378 }
379 
380 template <>
381 EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) {
383 
384  EIGEN_DEBUG_ALIGNED_LOAD return (Packet4f)__builtin_msa_ld_w(const_cast<float*>(from), 0);
385 }
386 
387 template <>
390 
391  EIGEN_DEBUG_ALIGNED_LOAD return __builtin_msa_ld_w(const_cast<int32_t*>(from), 0);
392 }
393 
394 template <>
395 EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) {
397 
398  EIGEN_DEBUG_UNALIGNED_LOAD return (Packet4f)__builtin_msa_ld_w(const_cast<float*>(from), 0);
399 }
400 
401 template <>
404 
405  EIGEN_DEBUG_UNALIGNED_LOAD return (Packet4i)__builtin_msa_ld_w(const_cast<int32_t*>(from), 0);
406 }
407 
408 template <>
411 
412  float f0 = from[0], f1 = from[1];
413  Packet4f v0 = { f0, f0, f0, f0 };
414  Packet4f v1 = { f1, f1, f1, f1 };
415  return (Packet4f)__builtin_msa_ilvr_d((v2i64)v1, (v2i64)v0);
416 }
417 
418 template <>
421 
422  int32_t i0 = from[0], i1 = from[1];
423  Packet4i v0 = { i0, i0, i0, i0 };
424  Packet4i v1 = { i1, i1, i1, i1 };
425  return (Packet4i)__builtin_msa_ilvr_d((v2i64)v1, (v2i64)v0);
426 }
427 
428 template <>
429 EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) {
431 
432  EIGEN_DEBUG_ALIGNED_STORE __builtin_msa_st_w((Packet4i)from, to, 0);
433 }
434 
435 template <>
438 
439  EIGEN_DEBUG_ALIGNED_STORE __builtin_msa_st_w(from, to, 0);
440 }
441 
442 template <>
443 EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) {
445 
446  EIGEN_DEBUG_UNALIGNED_STORE __builtin_msa_st_w((Packet4i)from, to, 0);
447 }
448 
449 template <>
452 
453  EIGEN_DEBUG_UNALIGNED_STORE __builtin_msa_st_w(from, to, 0);
454 }
455 
456 template <>
457 EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride) {
459 
460  float f = *from;
461  Packet4f v = { f, f, f, f };
462  v[1] = from[stride];
463  v[2] = from[2 * stride];
464  v[3] = from[3 * stride];
465  return v;
466 }
467 
468 template <>
471 
472  int32_t i = *from;
473  Packet4i v = { i, i, i, i };
474  v[1] = from[stride];
475  v[2] = from[2 * stride];
476  v[3] = from[3 * stride];
477  return v;
478 }
479 
480 template <>
481 EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from,
482  Index stride) {
484 
485  *to = from[0];
486  to += stride;
487  *to = from[1];
488  to += stride;
489  *to = from[2];
490  to += stride;
491  *to = from[3];
492 }
493 
494 template <>
496  Index stride) {
498 
499  *to = from[0];
500  to += stride;
501  *to = from[1];
502  to += stride;
503  *to = from[2];
504  to += stride;
505  *to = from[3];
506 }
507 
508 template <>
509 EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) {
511 
512  __builtin_prefetch(addr);
513 }
514 
515 template <>
518 
519  __builtin_prefetch(addr);
520 }
521 
522 template <>
525 
526  return a[0];
527 }
528 
529 template <>
532 
533  return a[0];
534 }
535 
536 template <>
539 
540  return (Packet4f)__builtin_msa_shf_w((v4i32)a, EIGEN_MSA_SHF_I8(3, 2, 1, 0));
541 }
542 
543 template <>
546 
547  return __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(3, 2, 1, 0));
548 }
549 
550 template <>
553 
554  return (Packet4f)__builtin_msa_bclri_w((v4u32)a, 31);
555 }
556 
557 template <>
560 
561  Packet4i zero = __builtin_msa_ldi_w(0);
562  return __builtin_msa_add_a_w(zero, a);
563 }
564 
565 template <>
568 
569  Packet4f s = padd(a, (Packet4f)__builtin_msa_shf_w((v4i32)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
570  s = padd(s, (Packet4f)__builtin_msa_shf_w((v4i32)s, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
571  return s[0];
572 }
573 
574 
575 template <>
578 
579  Packet4i s = padd(a, __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
580  s = padd(s, __builtin_msa_shf_w(s, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
581  return s[0];
582 }
583 
584 // Other reduction functions:
585 // mul
586 template <>
589 
590  Packet4f p = pmul(a, (Packet4f)__builtin_msa_shf_w((v4i32)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
591  p = pmul(p, (Packet4f)__builtin_msa_shf_w((v4i32)p, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
592  return p[0];
593 }
594 
595 template <>
598 
599  Packet4i p = pmul(a, __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
600  p = pmul(p, __builtin_msa_shf_w(p, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
601  return p[0];
602 }
603 
604 // min
605 template <>
608 
609  // Swap 64-bit halves of a.
610  Packet4f swapped = (Packet4f)__builtin_msa_shf_w((Packet4i)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
611 #if !EIGEN_FAST_MATH
612  // Detect presence of NaNs from pairs a[0]-a[2] and a[1]-a[3] as two 32-bit
613  // masks of all zeroes/ones in low 64 bits.
614  v16u8 unord = (v16u8)__builtin_msa_fcun_w(a, swapped);
615  // Combine the two masks into one: 64 ones if no NaNs, otherwise 64 zeroes.
616  unord = (v16u8)__builtin_msa_ceqi_d((v2i64)unord, 0);
617 #endif
618  // Continue with min computation.
619  Packet4f v = __builtin_msa_fmin_w(a, swapped);
620  v = __builtin_msa_fmin_w(
621  v, (Packet4f)__builtin_msa_shf_w((Packet4i)v, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
622 #if !EIGEN_FAST_MATH
623  // Based on the mask select between v and 4 qNaNs.
624  v16u8 qnans = (v16u8)__builtin_msa_fill_w(0x7FC00000);
625  v = (Packet4f)__builtin_msa_bsel_v(unord, qnans, (v16u8)v);
626 #endif
627  return v[0];
628 }
629 
630 template <>
633 
634  Packet4i m = pmin(a, __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
635  m = pmin(m, __builtin_msa_shf_w(m, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
636  return m[0];
637 }
638 
639 // max
640 template <>
643 
644  // Swap 64-bit halves of a.
645  Packet4f swapped = (Packet4f)__builtin_msa_shf_w((Packet4i)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
646 #if !EIGEN_FAST_MATH
647  // Detect presence of NaNs from pairs a[0]-a[2] and a[1]-a[3] as two 32-bit
648  // masks of all zeroes/ones in low 64 bits.
649  v16u8 unord = (v16u8)__builtin_msa_fcun_w(a, swapped);
650  // Combine the two masks into one: 64 ones if no NaNs, otherwise 64 zeroes.
651  unord = (v16u8)__builtin_msa_ceqi_d((v2i64)unord, 0);
652 #endif
653  // Continue with max computation.
654  Packet4f v = __builtin_msa_fmax_w(a, swapped);
655  v = __builtin_msa_fmax_w(
656  v, (Packet4f)__builtin_msa_shf_w((Packet4i)v, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
657 #if !EIGEN_FAST_MATH
658  // Based on the mask select between v and 4 qNaNs.
659  v16u8 qnans = (v16u8)__builtin_msa_fill_w(0x7FC00000);
660  v = (Packet4f)__builtin_msa_bsel_v(unord, qnans, (v16u8)v);
661 #endif
662  return v[0];
663 }
664 
665 template <>
668 
669  Packet4i m = pmax(a, __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
670  m = pmax(m, __builtin_msa_shf_w(m, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
671  return m[0];
672 }
673 
674 inline std::ostream& operator<<(std::ostream& os, const PacketBlock<Packet4f, 4>& value) {
675  os << "[ " << value.packet[0] << "," << std::endl
676  << " " << value.packet[1] << "," << std::endl
677  << " " << value.packet[2] << "," << std::endl
678  << " " << value.packet[3] << " ]";
679  return os;
680 }
681 
682 EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4f, 4>& kernel) {
684 
685  v4i32 tmp1, tmp2, tmp3, tmp4;
686 
687  tmp1 = __builtin_msa_ilvr_w((v4i32)kernel.packet[1], (v4i32)kernel.packet[0]);
688  tmp2 = __builtin_msa_ilvr_w((v4i32)kernel.packet[3], (v4i32)kernel.packet[2]);
689  tmp3 = __builtin_msa_ilvl_w((v4i32)kernel.packet[1], (v4i32)kernel.packet[0]);
690  tmp4 = __builtin_msa_ilvl_w((v4i32)kernel.packet[3], (v4i32)kernel.packet[2]);
691 
692  kernel.packet[0] = (Packet4f)__builtin_msa_ilvr_d((v2i64)tmp2, (v2i64)tmp1);
693  kernel.packet[1] = (Packet4f)__builtin_msa_ilvod_d((v2i64)tmp2, (v2i64)tmp1);
694  kernel.packet[2] = (Packet4f)__builtin_msa_ilvr_d((v2i64)tmp4, (v2i64)tmp3);
695  kernel.packet[3] = (Packet4f)__builtin_msa_ilvod_d((v2i64)tmp4, (v2i64)tmp3);
696 }
697 
698 inline std::ostream& operator<<(std::ostream& os, const PacketBlock<Packet4i, 4>& value) {
699  os << "[ " << value.packet[0] << "," << std::endl
700  << " " << value.packet[1] << "," << std::endl
701  << " " << value.packet[2] << "," << std::endl
702  << " " << value.packet[3] << " ]";
703  return os;
704 }
705 
706 EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4i, 4>& kernel) {
708 
709  v4i32 tmp1, tmp2, tmp3, tmp4;
710 
711  tmp1 = __builtin_msa_ilvr_w(kernel.packet[1], kernel.packet[0]);
712  tmp2 = __builtin_msa_ilvr_w(kernel.packet[3], kernel.packet[2]);
713  tmp3 = __builtin_msa_ilvl_w(kernel.packet[1], kernel.packet[0]);
714  tmp4 = __builtin_msa_ilvl_w(kernel.packet[3], kernel.packet[2]);
715 
716  kernel.packet[0] = (Packet4i)__builtin_msa_ilvr_d((v2i64)tmp2, (v2i64)tmp1);
717  kernel.packet[1] = (Packet4i)__builtin_msa_ilvod_d((v2i64)tmp2, (v2i64)tmp1);
718  kernel.packet[2] = (Packet4i)__builtin_msa_ilvr_d((v2i64)tmp4, (v2i64)tmp3);
719  kernel.packet[3] = (Packet4i)__builtin_msa_ilvod_d((v2i64)tmp4, (v2i64)tmp3);
720 }
721 
722 template <>
725 
726  return __builtin_msa_fsqrt_w(a);
727 }
728 
729 template <>
732 
733 #if EIGEN_FAST_MATH
734  return __builtin_msa_frsqrt_w(a);
735 #else
736  Packet4f ones = __builtin_msa_ffint_s_w(__builtin_msa_ldi_w(1));
737  return pdiv(ones, psqrt(a));
738 #endif
739 }
740 
741 template <>
743  Packet4f v = a;
744  int32_t old_mode, new_mode;
745  asm volatile(
746  "cfcmsa %[old_mode], $1\n"
747  "ori %[new_mode], %[old_mode], 3\n" // 3 = round towards -INFINITY.
748  "ctcmsa $1, %[new_mode]\n"
749  "frint.w %w[v], %w[v]\n"
750  "ctcmsa $1, %[old_mode]\n"
751  : // outputs
752  [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
753  [v] "+f"(v)
754  : // inputs
755  : // clobbers
756  );
757  return v;
758 }
759 
760 template <>
762  Packet4f v = a;
763  int32_t old_mode, new_mode;
764  asm volatile(
765  "cfcmsa %[old_mode], $1\n"
766  "ori %[new_mode], %[old_mode], 3\n"
767  "xori %[new_mode], %[new_mode], 1\n" // 2 = round towards +INFINITY.
768  "ctcmsa $1, %[new_mode]\n"
769  "frint.w %w[v], %w[v]\n"
770  "ctcmsa $1, %[old_mode]\n"
771  : // outputs
772  [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
773  [v] "+f"(v)
774  : // inputs
775  : // clobbers
776  );
777  return v;
778 }
779 
780 template <>
782  Packet4f v = a;
783  int32_t old_mode, new_mode;
784  asm volatile(
785  "cfcmsa %[old_mode], $1\n"
786  "ori %[new_mode], %[old_mode], 3\n"
787  "xori %[new_mode], %[new_mode], 3\n" // 0 = round to nearest, ties to even.
788  "ctcmsa $1, %[new_mode]\n"
789  "frint.w %w[v], %w[v]\n"
790  "ctcmsa $1, %[old_mode]\n"
791  : // outputs
792  [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
793  [v] "+f"(v)
794  : // inputs
795  : // clobbers
796  );
797  return v;
798 }
799 
800 template <>
801 EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket,
802  const Packet4f& elsePacket) {
803  Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2],
804  ifPacket.select[3] };
805  Packet4i mask = __builtin_msa_ceqi_w((Packet4i)select, 0);
806  return (Packet4f)__builtin_msa_bsel_v((v16u8)mask, (v16u8)thenPacket, (v16u8)elsePacket);
807 }
808 
809 template <>
810 EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket,
811  const Packet4i& elsePacket) {
812  Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2],
813  ifPacket.select[3] };
814  Packet4i mask = __builtin_msa_ceqi_w((Packet4i)select, 0);
815  return (Packet4i)__builtin_msa_bsel_v((v16u8)mask, (v16u8)thenPacket, (v16u8)elsePacket);
816 }
817 
818 //---------- double ----------
819 
820 typedef v2f64 Packet2d;
821 typedef v2i64 Packet2l;
822 typedef v2u64 Packet2ul;
823 
824 #define _EIGEN_DECLARE_CONST_Packet2d(NAME, X) const Packet2d p2d_##NAME = { X, X }
825 #define _EIGEN_DECLARE_CONST_Packet2l(NAME, X) const Packet2l p2l_##NAME = { X, X }
826 #define _EIGEN_DECLARE_CONST_Packet2ul(NAME, X) const Packet2ul p2ul_##NAME = { X, X }
827 
828 inline std::ostream& operator<<(std::ostream& os, const Packet2d& value) {
829  os << "[ " << value[0] << ", " << value[1] << " ]";
830  return os;
831 }
832 
833 inline std::ostream& operator<<(std::ostream& os, const Packet2l& value) {
834  os << "[ " << value[0] << ", " << value[1] << " ]";
835  return os;
836 }
837 
838 inline std::ostream& operator<<(std::ostream& os, const Packet2ul& value) {
839  os << "[ " << value[0] << ", " << value[1] << " ]";
840  return os;
841 }
842 
843 template <>
844 struct packet_traits<double> : default_packet_traits {
845  typedef Packet2d type;
846  typedef Packet2d half;
847  enum {
848  Vectorizable = 1,
849  AlignedOnScalar = 1,
850  size = 2,
851  HasHalfPacket = 0,
852  // FIXME check the Has*
853  HasDiv = 1,
854  HasExp = 1,
855  HasSqrt = 1,
856  HasRsqrt = 1,
857  HasRound = 1,
858  HasFloor = 1,
859  HasCeil = 1,
860  HasBlend = 1
861  };
862 };
863 
864 template <>
866  typedef double type;
868  typedef Packet2d half;
869 };
870 
871 template <>
874 
875  Packet2d value = { from, from };
876  return value;
877 }
878 
879 template <>
882 
883  return __builtin_msa_fadd_d(a, b);
884 }
885 
886 template <>
889 
890  static const Packet2d countdown = { 0.0, 1.0 };
891  return padd(pset1<Packet2d>(a), countdown);
892 }
893 
894 template <>
897 
898  return __builtin_msa_fsub_d(a, b);
899 }
900 
901 template <>
904 
905  return (Packet2d)__builtin_msa_bnegi_d((v2u64)a, 63);
906 }
907 
908 template <>
911 
912  return a;
913 }
914 
915 template <>
918 
919  return __builtin_msa_fmul_d(a, b);
920 }
921 
922 template <>
925 
926  return __builtin_msa_fdiv_d(a, b);
927 }
928 
929 template <>
932 
933  return __builtin_msa_fmadd_d(c, a, b);
934 }
935 
936 // Logical Operations are not supported for float, so we have to reinterpret casts using MSA
937 // intrinsics
938 template <>
941 
942  return (Packet2d)__builtin_msa_and_v((v16u8)a, (v16u8)b);
943 }
944 
945 template <>
948 
949  return (Packet2d)__builtin_msa_or_v((v16u8)a, (v16u8)b);
950 }
951 
952 template <>
955 
956  return (Packet2d)__builtin_msa_xor_v((v16u8)a, (v16u8)b);
957 }
958 
959 template <>
962 
963  return pand(a, (Packet2d)__builtin_msa_xori_b((v16u8)b, 255));
964 }
965 
966 template <>
969 
970  EIGEN_DEBUG_UNALIGNED_LOAD return (Packet2d)__builtin_msa_ld_d(const_cast<double*>(from), 0);
971 }
972 
973 template <>
976 
977 #if EIGEN_FAST_MATH
978  // This prefers numbers to NaNs.
979  return __builtin_msa_fmin_d(a, b);
980 #else
981  // This prefers NaNs to numbers.
982  v2i64 aNaN = __builtin_msa_fcun_d(a, a);
983  v2i64 aMinOrNaN = por(__builtin_msa_fclt_d(a, b), aNaN);
984  return (Packet2d)__builtin_msa_bsel_v((v16u8)aMinOrNaN, (v16u8)b, (v16u8)a);
985 #endif
986 }
987 
988 template <>
991 
992 #if EIGEN_FAST_MATH
993  // This prefers numbers to NaNs.
994  return __builtin_msa_fmax_d(a, b);
995 #else
996  // This prefers NaNs to numbers.
997  v2i64 aNaN = __builtin_msa_fcun_d(a, a);
998  v2i64 aMaxOrNaN = por(__builtin_msa_fclt_d(b, a), aNaN);
999  return (Packet2d)__builtin_msa_bsel_v((v16u8)aMaxOrNaN, (v16u8)b, (v16u8)a);
1000 #endif
1001 }
1002 
1003 template <>
1006 
1007  EIGEN_DEBUG_UNALIGNED_LOAD return (Packet2d)__builtin_msa_ld_d(const_cast<double*>(from), 0);
1008 }
1009 
1010 template <>
1013 
1014  Packet2d value = { *from, *from };
1015  return value;
1016 }
1017 
1018 template <>
1019 EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) {
1021 
1022  EIGEN_DEBUG_ALIGNED_STORE __builtin_msa_st_d((v2i64)from, to, 0);
1023 }
1024 
1025 template <>
1026 EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) {
1028 
1029  EIGEN_DEBUG_UNALIGNED_STORE __builtin_msa_st_d((v2i64)from, to, 0);
1030 }
1031 
1032 template <>
1033 EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride) {
1035 
1036  Packet2d value;
1037  value[0] = *from;
1038  from += stride;
1039  value[1] = *from;
1040  return value;
1041 }
1042 
1043 template <>
1044 EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from,
1045  Index stride) {
1047 
1048  *to = from[0];
1049  to += stride;
1050  *to = from[1];
1051 }
1052 
1053 template <>
1054 EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) {
1056 
1057  __builtin_prefetch(addr);
1058 }
1059 
1060 template <>
1063 
1064  return a[0];
1065 }
1066 
1067 template <>
1070 
1071  return (Packet2d)__builtin_msa_shf_w((v4i32)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
1072 }
1073 
1074 template <>
1077 
1078  return (Packet2d)__builtin_msa_bclri_d((v2u64)a, 63);
1079 }
1080 
1081 template <>
1084 
1085  Packet2d s = padd(a, preverse(a));
1086  return s[0];
1087 }
1088 
1089 // Other reduction functions:
1090 // mul
1091 template <>
1094 
1095  Packet2d p = pmul(a, preverse(a));
1096  return p[0];
1097 }
1098 
1099 // min
1100 template <>
1103 
1104 #if EIGEN_FAST_MATH
1105  Packet2d swapped = (Packet2d)__builtin_msa_shf_w((Packet4i)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
1106  Packet2d v = __builtin_msa_fmin_d(a, swapped);
1107  return v[0];
1108 #else
1109  double a0 = a[0], a1 = a[1];
1110  return ((numext::isnan)(a0) || a0 < a1) ? a0 : a1;
1111 #endif
1112 }
1113 
1114 // max
1115 template <>
1118 
1119 #if EIGEN_FAST_MATH
1120  Packet2d swapped = (Packet2d)__builtin_msa_shf_w((Packet4i)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
1121  Packet2d v = __builtin_msa_fmax_d(a, swapped);
1122  return v[0];
1123 #else
1124  double a0 = a[0], a1 = a[1];
1125  return ((numext::isnan)(a0) || a0 > a1) ? a0 : a1;
1126 #endif
1127 }
1128 
1129 template <>
1132 
1133  return __builtin_msa_fsqrt_d(a);
1134 }
1135 
1136 template <>
1139 
1140 #if EIGEN_FAST_MATH
1141  return __builtin_msa_frsqrt_d(a);
1142 #else
1143  Packet2d ones = __builtin_msa_ffint_s_d(__builtin_msa_ldi_d(1));
1144  return pdiv(ones, psqrt(a));
1145 #endif
1146 }
1147 
1148 inline std::ostream& operator<<(std::ostream& os, const PacketBlock<Packet2d, 2>& value) {
1149  os << "[ " << value.packet[0] << "," << std::endl << " " << value.packet[1] << " ]";
1150  return os;
1151 }
1152 
1155 
1156  Packet2d trn1 = (Packet2d)__builtin_msa_ilvev_d((v2i64)kernel.packet[1], (v2i64)kernel.packet[0]);
1157  Packet2d trn2 = (Packet2d)__builtin_msa_ilvod_d((v2i64)kernel.packet[1], (v2i64)kernel.packet[0]);
1158  kernel.packet[0] = trn1;
1159  kernel.packet[1] = trn2;
1160 }
1161 
1162 template <>
1164  Packet2d v = a;
1165  int32_t old_mode, new_mode;
1166  asm volatile(
1167  "cfcmsa %[old_mode], $1\n"
1168  "ori %[new_mode], %[old_mode], 3\n" // 3 = round towards -INFINITY.
1169  "ctcmsa $1, %[new_mode]\n"
1170  "frint.d %w[v], %w[v]\n"
1171  "ctcmsa $1, %[old_mode]\n"
1172  : // outputs
1173  [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
1174  [v] "+f"(v)
1175  : // inputs
1176  : // clobbers
1177  );
1178  return v;
1179 }
1180 
1181 template <>
1183  Packet2d v = a;
1184  int32_t old_mode, new_mode;
1185  asm volatile(
1186  "cfcmsa %[old_mode], $1\n"
1187  "ori %[new_mode], %[old_mode], 3\n"
1188  "xori %[new_mode], %[new_mode], 1\n" // 2 = round towards +INFINITY.
1189  "ctcmsa $1, %[new_mode]\n"
1190  "frint.d %w[v], %w[v]\n"
1191  "ctcmsa $1, %[old_mode]\n"
1192  : // outputs
1193  [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
1194  [v] "+f"(v)
1195  : // inputs
1196  : // clobbers
1197  );
1198  return v;
1199 }
1200 
1201 template <>
1203  Packet2d v = a;
1204  int32_t old_mode, new_mode;
1205  asm volatile(
1206  "cfcmsa %[old_mode], $1\n"
1207  "ori %[new_mode], %[old_mode], 3\n"
1208  "xori %[new_mode], %[new_mode], 3\n" // 0 = round to nearest, ties to even.
1209  "ctcmsa $1, %[new_mode]\n"
1210  "frint.d %w[v], %w[v]\n"
1211  "ctcmsa $1, %[old_mode]\n"
1212  : // outputs
1213  [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
1214  [v] "+f"(v)
1215  : // inputs
1216  : // clobbers
1217  );
1218  return v;
1219 }
1220 
1221 template <>
1222 EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket,
1223  const Packet2d& elsePacket) {
1224  Packet2ul select = { ifPacket.select[0], ifPacket.select[1] };
1225  Packet2l mask = __builtin_msa_ceqi_d((Packet2l)select, 0);
1226  return (Packet2d)__builtin_msa_bsel_v((v16u8)mask, (v16u8)thenPacket, (v16u8)elsePacket);
1227 }
1228 
1229 } // end namespace internal
1230 
1231 } // end namespace Eigen
1232 
1233 #endif // EIGEN_PACKET_MATH_MSA_H
EIGEN_MSA_DEBUG
#define EIGEN_MSA_DEBUG
Definition: MSA/PacketMath.h:45
Eigen::internal::Packet4i
__vector int Packet4i
Definition: AltiVec/PacketMath.h:31
Eigen::internal::pround< Packet4f >
EIGEN_STRONG_INLINE Packet4f pround< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:921
Eigen::internal::pset1< Packet2d >
EIGEN_STRONG_INLINE Packet2d pset1< Packet2d >(const double &from)
Definition: MSA/PacketMath.h:872
Eigen::internal::pdiv< Packet2d >
EIGEN_STRONG_INLINE Packet2d pdiv< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: MSA/PacketMath.h:923
EIGEN_DEVICE_FUNC
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:976
Eigen
Namespace containing all symbols from the Eigen library.
Definition: jet.h:637
Eigen::internal::psqrt
EIGEN_STRONG_INLINE Packet4f psqrt(const Packet4f &a)
Definition: MSA/PacketMath.h:723
Eigen::internal::pmul< Packet2d >
EIGEN_STRONG_INLINE Packet2d pmul< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: MSA/PacketMath.h:916
Eigen::internal::pxor< Packet4i >
EIGEN_STRONG_INLINE Packet4i pxor< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:909
Eigen::internal::pload< Packet4i >
EIGEN_STRONG_INLINE Packet4i pload< Packet4i >(const int *from)
Definition: AltiVec/PacketMath.h:448
Eigen::internal::default_packet_traits::HasLog
@ HasLog
Definition: GenericPacketMath.h:70
Eigen::internal::pfirst< Packet4i >
EIGEN_STRONG_INLINE int pfirst< Packet4i >(const Packet4i &a)
Definition: AltiVec/PacketMath.h:1121
Eigen::internal::packet_traits::size
@ size
Definition: GenericPacketMath.h:112
Eigen::internal::Packet4f
__vector float Packet4f
Definition: AltiVec/PacketMath.h:30
s
RealScalar s
Definition: level1_cplx_impl.h:126
Eigen::internal::pand< Packet4i >
EIGEN_STRONG_INLINE Packet4i pand< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:892
v0
static const double v0
Definition: testCal3DFisheye.cpp:31
Eigen::internal::pfloor< Packet4f >
EIGEN_STRONG_INLINE Packet4f pfloor< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:939
Eigen::internal::prefetch< int32_t >
EIGEN_STRONG_INLINE void prefetch< int32_t >(const int32_t *addr)
Definition: MSA/PacketMath.h:516
Eigen::internal::pscatter< int32_t, Packet4i >
EIGEN_DEVICE_FUNC void pscatter< int32_t, Packet4i >(int32_t *to, const Packet4i &from, Index stride)
Definition: MSA/PacketMath.h:495
Eigen::internal::predux_max< Packet2d >
EIGEN_STRONG_INLINE double predux_max< Packet2d >(const Packet2d &a)
Definition: MSA/PacketMath.h:1116
Eigen::internal::PacketBlock
Definition: GenericPacketMath.h:1014
Eigen::internal::pgather< float, Packet4f >
EIGEN_DEVICE_FUNC Packet4f pgather< float, Packet4f >(const float *from, Index stride)
Definition: AltiVec/PacketMath.h:613
c
Scalar Scalar * c
Definition: benchVecAdd.cpp:17
b
Scalar * b
Definition: benchVecAdd.cpp:17
Eigen::internal::ploaddup< Packet2d >
EIGEN_STRONG_INLINE Packet2d ploaddup< Packet2d >(const double *from)
Definition: MSA/PacketMath.h:1011
Eigen::internal::psub< Packet4f >
EIGEN_STRONG_INLINE Packet4f psub< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:782
Eigen::internal::plset< Packet4i >
EIGEN_STRONG_INLINE Packet4i plset< Packet4i >(const int &a)
Definition: AltiVec/PacketMath.h:768
Eigen::internal::Selector
Definition: GenericPacketMath.h:1027
Eigen::internal::packet_traits
Definition: GenericPacketMath.h:106
Eigen::internal::ploadu< Packet4i >
EIGEN_STRONG_INLINE Packet4i ploadu< Packet4i >(const int *from)
Definition: AltiVec/PacketMath.h:972
Eigen::internal::preverse
EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf &a)
Definition: AltiVec/Complex.h:184
Eigen::internal::PacketBlock::packet
Packet packet[N]
Definition: GenericPacketMath.h:1018
Eigen::internal::unpacket_traits< Packet4i >::half
Packet4i half
Definition: MSA/PacketMath.h:125
Eigen::internal::pandnot< Packet4i >
EIGEN_STRONG_INLINE Packet4i pandnot< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:915
Eigen::internal::Selector::select
bool select[N]
Definition: GenericPacketMath.h:1031
Eigen::internal::pdiv
EIGEN_DEVICE_FUNC Packet pdiv(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:244
Eigen::internal::ploadu< Packet2d >
EIGEN_STRONG_INLINE Packet2d ploadu< Packet2d >(const double *from)
Definition: MSA/PacketMath.h:1004
Eigen::internal::pconj
EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf &a)
Definition: AltiVec/Complex.h:167
Eigen::internal::predux_max< Packet4f >
EIGEN_STRONG_INLINE float predux_max< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:1693
Eigen::internal::packet_traits< float >::half
Packet4f half
Definition: MSA/PacketMath.h:76
Eigen::internal::unpacket_traits< Packet4i >::type
int32_t type
Definition: MSA/PacketMath.h:123
Eigen::internal::prefetch< float >
EIGEN_STRONG_INLINE void prefetch< float >(const float *addr)
Definition: AltiVec/PacketMath.h:1117
Eigen::internal::ploadu< Packet4f >
EIGEN_STRONG_INLINE Packet4f ploadu< Packet4f >(const float *from)
Definition: AltiVec/PacketMath.h:968
os
ofstream os("timeSchurFactors.csv")
Eigen::internal::unpacket_traits::vectorizable
@ vectorizable
Definition: GenericPacketMath.h:140
Eigen::internal::pgather< double, Packet2d >
EIGEN_DEVICE_FUNC Packet2d pgather< double, Packet2d >(const double *from, Index stride)
Definition: MSA/PacketMath.h:1033
Eigen::internal::default_packet_traits::HasBlend
@ HasBlend
Definition: GenericPacketMath.h:60
Eigen::internal::default_packet_traits::HasCos
@ HasCos
Definition: GenericPacketMath.h:76
Eigen::internal::default_packet_traits
Definition: GenericPacketMath.h:42
Eigen::internal::pload< Packet4f >
EIGEN_STRONG_INLINE Packet4f pload< Packet4f >(const float *from)
Definition: AltiVec/PacketMath.h:443
Eigen::internal::por< Packet2d >
EIGEN_STRONG_INLINE Packet2d por< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: MSA/PacketMath.h:946
Eigen::internal::packet_traits::HasHalfPacket
@ HasHalfPacket
Definition: GenericPacketMath.h:114
Eigen::internal::padd< Packet4f >
EIGEN_STRONG_INLINE Packet4f padd< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:774
EIGEN_DEBUG_UNALIGNED_STORE
#define EIGEN_DEBUG_UNALIGNED_STORE
Definition: GenericPacketMath.h:39
Eigen::internal::pload1< Packet4i >
EIGEN_STRONG_INLINE Packet4i pload1< Packet4i >(const int32_t *from)
Definition: MSA/PacketMath.h:153
Eigen::internal::unpacket_traits::masked_store_available
@ masked_store_available
Definition: GenericPacketMath.h:142
Eigen::internal::pand< Packet2d >
EIGEN_STRONG_INLINE Packet2d pand< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: MSA/PacketMath.h:939
Eigen::internal::pandnot< Packet4f >
EIGEN_STRONG_INLINE Packet4f pandnot< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:914
Eigen::internal::psub< Packet4i >
EIGEN_STRONG_INLINE Packet4i psub< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:783
Eigen::internal::padd< Packet2d >
EIGEN_STRONG_INLINE Packet2d padd< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: MSA/PacketMath.h:880
Eigen::internal::packet_traits< int32_t >::half
Packet4i half
Definition: MSA/PacketMath.h:102
align_3::a1
Point2 a1
Definition: testPose2.cpp:769
Eigen::internal::pmul< Packet4i >
EIGEN_STRONG_INLINE Packet4i pmul< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:796
Eigen::internal::unpacket_traits::size
@ size
Definition: GenericPacketMath.h:138
Eigen::internal::unpacket_traits::alignment
@ alignment
Definition: GenericPacketMath.h:139
Eigen::internal::pmin< Packet4f >
EIGEN_STRONG_INLINE Packet4f pmin< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:832
Eigen::internal::pstore< double >
EIGEN_STRONG_INLINE void pstore< double >(double *to, const Packet4d &from)
Definition: AVX/PacketMath.h:623
Eigen::internal::pceil< Packet4f >
EIGEN_STRONG_INLINE Packet4f pceil< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:938
Eigen::internal::Packet4ui
__vector unsigned int Packet4ui
Definition: AltiVec/PacketMath.h:32
Eigen::internal::pload1< Packet4f >
EIGEN_STRONG_INLINE Packet4f pload1< Packet4f >(const float *from)
Definition: MSA/PacketMath.h:144
Eigen::internal::default_packet_traits::HasSin
@ HasSin
Definition: GenericPacketMath.h:75
Eigen::internal::pmul< Packet4f >
EIGEN_STRONG_INLINE Packet4f pmul< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:795
Eigen::internal::packet_traits::AlignedOnScalar
@ AlignedOnScalar
Definition: GenericPacketMath.h:113
Eigen::internal::pxor< Packet2d >
EIGEN_STRONG_INLINE Packet2d pxor< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: MSA/PacketMath.h:953
Eigen::internal::pnegate
EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf &a)
Definition: AltiVec/Complex.h:166
Eigen::internal::plset< Packet2d >
EIGEN_STRONG_INLINE Packet2d plset< Packet2d >(const double &a)
Definition: MSA/PacketMath.h:887
Eigen::internal::unpacket_traits
Definition: GenericPacketMath.h:132
Eigen::internal::pset1< Packet4f >
EIGEN_STRONG_INLINE Packet4f pset1< Packet4f >(const float &from)
Definition: AltiVec/PacketMath.h:547
Eigen::internal::ptranspose
EIGEN_STRONG_INLINE void ptranspose(PacketBlock< Packet2cf, 2 > &kernel)
Definition: AltiVec/Complex.h:224
Eigen::internal::por< Packet4i >
EIGEN_STRONG_INLINE Packet4i por< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:901
Eigen::internal::predux_min< Packet4i >
EIGEN_STRONG_INLINE int predux_min< Packet4i >(const Packet4i &a)
Definition: AltiVec/PacketMath.h:1618
zero
EIGEN_DONT_INLINE Scalar zero()
Definition: svd_common.h:296
EIGEN_DEBUG_ALIGNED_LOAD
#define EIGEN_DEBUG_ALIGNED_LOAD
Definition: GenericPacketMath.h:27
Eigen::internal::packet_traits< int32_t >::type
Packet4i type
Definition: MSA/PacketMath.h:101
EIGEN_STRONG_INLINE
#define EIGEN_STRONG_INLINE
Definition: Macros.h:917
Eigen::internal::pdiv< Packet4i >
EIGEN_STRONG_INLINE Packet4i pdiv< Packet4i >(const Packet4i &, const Packet4i &)
Definition: AltiVec/PacketMath.h:821
Eigen::internal::predux_mul< Packet4f >
EIGEN_STRONG_INLINE float predux_mul< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:1533
Eigen::internal::pmax
EIGEN_DEVICE_FUNC Packet pmax(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:524
Eigen::numext::isnan
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool() isnan(const Eigen::bfloat16 &h)
Definition: BFloat16.h:659
Eigen::internal::default_packet_traits::HasErf
@ HasErf
Definition: GenericPacketMath.h:88
Eigen::internal::pblend
EIGEN_STRONG_INLINE Packet4i pblend(const Selector< 4 > &ifPacket, const Packet4i &thenPacket, const Packet4i &elsePacket)
Definition: AltiVec/PacketMath.h:2107
Eigen::internal::pscatter< double, Packet2d >
EIGEN_DEVICE_FUNC void pscatter< double, Packet2d >(double *to, const Packet2d &from, Index stride)
Definition: MSA/PacketMath.h:1044
Eigen::internal::default_packet_traits::HasSqrt
@ HasSqrt
Definition: GenericPacketMath.h:66
Eigen::internal::Packet2l
v2i64 Packet2l
Definition: MSA/PacketMath.h:821
Eigen::internal::pscatter< float, Packet4f >
EIGEN_DEVICE_FUNC void pscatter< float, Packet4f >(float *to, const Packet4f &from, Index stride)
Definition: AltiVec/PacketMath.h:695
Eigen::internal::pand
EIGEN_STRONG_INLINE Packet8h pand(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:1050
Eigen::internal::packet_traits< float >::type
Packet4f type
Definition: MSA/PacketMath.h:75
Eigen::internal::unpacket_traits< Packet4f >::half
Packet4f half
Definition: MSA/PacketMath.h:118
m
Matrix3f m
Definition: AngleAxis_mimic_euler.cpp:1
Eigen::internal::default_packet_traits::HasCeil
@ HasCeil
Definition: GenericPacketMath.h:101
Eigen::internal::pandnot< Packet2d >
EIGEN_STRONG_INLINE Packet2d pandnot< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: MSA/PacketMath.h:960
Eigen::internal::predux_min< Packet4f >
EIGEN_STRONG_INLINE float predux_min< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:1613
Eigen::internal::operator<<
std::ostream & operator<<(std::ostream &s, const Packet16c &v)
Definition: AltiVec/PacketMath.h:371
Eigen::internal::predux_mul< Packet2d >
EIGEN_STRONG_INLINE double predux_mul< Packet2d >(const Packet2d &a)
Definition: MSA/PacketMath.h:1092
Eigen::internal::default_packet_traits::HasFloor
@ HasFloor
Definition: GenericPacketMath.h:100
Eigen::internal::unpacket_traits< Packet2d >::half
Packet2d half
Definition: MSA/PacketMath.h:868
Eigen::internal::plset< Packet4f >
EIGEN_STRONG_INLINE Packet4f plset< Packet4f >(const float &a)
Definition: AltiVec/PacketMath.h:767
Eigen::internal::ploaddup< Packet4f >
EIGEN_STRONG_INLINE Packet4f ploaddup< Packet4f >(const float *from)
Definition: AltiVec/PacketMath.h:1004
Eigen::internal::default_packet_traits::HasTanh
@ HasTanh
Definition: GenericPacketMath.h:83
Eigen::internal::unpacket_traits::masked_load_available
@ masked_load_available
Definition: GenericPacketMath.h:141
Eigen::internal::psub< Packet2d >
EIGEN_STRONG_INLINE Packet2d psub< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: MSA/PacketMath.h:895
tree::f
Point2(* f)(const Point3 &, OptionalJacobian< 2, 3 >)
Definition: testExpression.cpp:218
i0
double i0(double x)
Definition: i0.c:149
Eigen::Aligned16
@ Aligned16
Definition: Constants.h:235
a
ArrayXXi a
Definition: Array_initializer_list_23_cxx11.cpp:1
Eigen::internal::predux< Packet4i >
EIGEN_STRONG_INLINE int predux< Packet4i >(const Packet4i &a)
Definition: AltiVec/PacketMath.h:1454
Eigen::internal::default_packet_traits::HasExp
@ HasExp
Definition: GenericPacketMath.h:68
Eigen::internal::pmul
EIGEN_DEVICE_FUNC Packet pmul(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:237
Eigen::internal::pceil< Packet2d >
EIGEN_STRONG_INLINE Packet2d pceil< Packet2d >(const Packet2d &a)
Definition: MSA/PacketMath.h:1182
Eigen::internal::pstore< int32_t >
EIGEN_STRONG_INLINE void pstore< int32_t >(int32_t *to, const Packet4i &from)
Definition: MSA/PacketMath.h:436
Eigen::internal::pstoreu< int32_t >
EIGEN_STRONG_INLINE void pstoreu< int32_t >(int32_t *to, const Packet4i &from)
Definition: MSA/PacketMath.h:450
Eigen::internal::Packet2d
v2f64 Packet2d
Definition: MSA/PacketMath.h:820
i1
double i1(double x)
Definition: i1.c:150
EIGEN_DEBUG_UNALIGNED_LOAD
#define EIGEN_DEBUG_UNALIGNED_LOAD
Definition: GenericPacketMath.h:31
Eigen::internal::pmax< Packet2d >
EIGEN_STRONG_INLINE Packet2d pmax< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: MSA/PacketMath.h:989
Eigen::internal::default_packet_traits::HasRsqrt
@ HasRsqrt
Definition: GenericPacketMath.h:67
Eigen::internal::pfirst< Packet2d >
EIGEN_STRONG_INLINE double pfirst< Packet2d >(const Packet2d &a)
Definition: MSA/PacketMath.h:1061
Eigen::internal::por< Packet4f >
EIGEN_STRONG_INLINE Packet4f por< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:900
Eigen::internal::pmadd
EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: AltiVec/PacketMath.h:827
Eigen::internal::pand< Packet4f >
EIGEN_STRONG_INLINE Packet4f pand< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:891
p
float * p
Definition: Tutorial_Map_using.cpp:9
Eigen::internal::packet_traits< double >::half
Packet2d half
Definition: MSA/PacketMath.h:846
Eigen::internal::pmin< Packet2d >
EIGEN_STRONG_INLINE Packet2d pmin< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: MSA/PacketMath.h:974
Eigen::internal::pstoreu< double >
EIGEN_STRONG_INLINE void pstoreu< double >(double *to, const Packet4d &from)
Definition: AVX/PacketMath.h:627
Eigen::internal::predux< Packet4f >
EIGEN_STRONG_INLINE float predux< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:1444
Eigen::internal::pabs
EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f &a)
Definition: AltiVec/PacketMath.h:1176
v
Array< int, Dynamic, 1 > v
Definition: Array_initializer_list_vector_cxx11.cpp:1
Eigen::internal::padd
EIGEN_DEVICE_FUNC Packet padd(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:215
Eigen::internal::pmin
EIGEN_DEVICE_FUNC Packet pmin(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:512
Eigen::internal::pgather< int32_t, Packet4i >
EIGEN_DEVICE_FUNC Packet4i pgather< int32_t, Packet4i >(const int32_t *from, Index stride)
Definition: MSA/PacketMath.h:469
gtsam.examples.DogLegOptimizerExample.float
float
Definition: DogLegOptimizerExample.py:113
Eigen::internal::pfirst< Packet4f >
EIGEN_STRONG_INLINE float pfirst< Packet4f >(const Packet4f &a)
Definition: AltiVec/PacketMath.h:1120
int32_t
signed int int32_t
Definition: ms_stdint.h:82
Eigen::internal::por
EIGEN_STRONG_INLINE Packet8h por(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:1042
Eigen::internal::pfloor< Packet2d >
EIGEN_STRONG_INLINE Packet2d pfloor< Packet2d >(const Packet2d &a)
Definition: MSA/PacketMath.h:1163
Eigen::internal::default_packet_traits::HasDiv
@ HasDiv
Definition: GenericPacketMath.h:65
Eigen::internal::Packet2ul
v2u64 Packet2ul
Definition: MSA/PacketMath.h:822
Eigen::internal::pxor< Packet4f >
EIGEN_STRONG_INLINE Packet4f pxor< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:908
unary::f1
Point2 f1(const Point3 &p, OptionalJacobian< 2, 3 > H)
Definition: testExpression.cpp:79
internal
Definition: BandTriangularSolver.h:13
Eigen::internal::pstoreu< float >
EIGEN_STRONG_INLINE void pstoreu< float >(float *to, const Packet4f &from)
Definition: AltiVec/PacketMath.h:1088
EIGEN_MSA_SHF_I8
#define EIGEN_MSA_SHF_I8(a, b, c, d)
Definition: MSA/PacketMath.h:48
Eigen::internal::predux_mul< Packet4i >
EIGEN_STRONG_INLINE int predux_mul< Packet4i >(const Packet4i &a)
Definition: AltiVec/PacketMath.h:1540
Eigen::internal::prsqrt
EIGEN_STRONG_INLINE Packet4f prsqrt(const Packet4f &a)
Definition: MSA/PacketMath.h:730
Eigen::internal::predux< Packet2d >
EIGEN_STRONG_INLINE double predux< Packet2d >(const Packet2d &a)
Definition: MSA/PacketMath.h:1082
Eigen::internal::predux_min< Packet2d >
EIGEN_STRONG_INLINE double predux_min< Packet2d >(const Packet2d &a)
Definition: MSA/PacketMath.h:1101
Eigen::internal::pdiv< Packet4f >
EIGEN_STRONG_INLINE Packet4f pdiv< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:803
Eigen::internal::packet_traits< double >::type
Packet2d type
Definition: MSA/PacketMath.h:845
Eigen::internal::pmax< Packet4i >
EIGEN_STRONG_INLINE Packet4i pmax< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:861
Eigen::internal::pmax< Packet4f >
EIGEN_STRONG_INLINE Packet4f pmax< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: AltiVec/PacketMath.h:850
Eigen::internal::pmin< Packet4i >
EIGEN_STRONG_INLINE Packet4i pmin< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:843
Eigen::internal::prefetch< double >
EIGEN_STRONG_INLINE void prefetch< double >(const double *addr)
Definition: AVX/PacketMath.h:692
Eigen::internal::pset1< Packet4i >
EIGEN_STRONG_INLINE Packet4i pset1< Packet4i >(const int &from)
Definition: AltiVec/PacketMath.h:551
Eigen::internal::packet_traits::Vectorizable
@ Vectorizable
Definition: GenericPacketMath.h:111
test_callbacks.value
value
Definition: test_callbacks.py:158
i
int i
Definition: BiCGSTAB_step_by_step.cpp:9
Eigen::internal::predux_max< Packet4i >
EIGEN_STRONG_INLINE int predux_max< Packet4i >(const Packet4i &a)
Definition: AltiVec/PacketMath.h:1698
Eigen::internal::pload< Packet2d >
EIGEN_STRONG_INLINE Packet2d pload< Packet2d >(const double *from)
Definition: MSA/PacketMath.h:967
EIGEN_DEBUG_ALIGNED_STORE
#define EIGEN_DEBUG_ALIGNED_STORE
Definition: GenericPacketMath.h:35
Eigen::internal::pstore< float >
EIGEN_STRONG_INLINE void pstore< float >(float *to, const Packet4f &from)
Definition: AltiVec/PacketMath.h:491
v1
Vector v1
Definition: testSerializationBase.cpp:38
ones
MatrixXcf ones
Definition: ComplexEigenSolver_eigenvalues.cpp:1
Eigen::internal::padd< Packet4i >
EIGEN_STRONG_INLINE Packet4i padd< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: AltiVec/PacketMath.h:775
Eigen::Index
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:74
Eigen::internal::ploaddup< Packet4i >
EIGEN_STRONG_INLINE Packet4i ploaddup< Packet4i >(const int *from)
Definition: AltiVec/PacketMath.h:1008
EIGEN_FAST_MATH
#define EIGEN_FAST_MATH
Definition: Macros.h:49
Eigen::internal::unpacket_traits< Packet2d >::type
double type
Definition: MSA/PacketMath.h:866
Eigen::internal::unpacket_traits< Packet4f >::type
float type
Definition: MSA/PacketMath.h:116
Eigen::internal::default_packet_traits::HasRound
@ HasRound
Definition: GenericPacketMath.h:98
Eigen::internal::pround< Packet2d >
EIGEN_STRONG_INLINE Packet2d pround< Packet2d >(const Packet2d &a)
Definition: MSA/PacketMath.h:1202


gtsam
Author(s):
autogenerated on Thu Jun 13 2024 03:03:53