10 #ifndef EIGEN_PACKET_MATH_SSE_H 11 #define EIGEN_PACKET_MATH_SSE_H 17 #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 18 #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8 21 #if !defined(EIGEN_VECTORIZE_AVX) && !defined(EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS) 24 #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*)) 27 #ifdef EIGEN_VECTORIZE_FMA 28 #ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD 29 #define EIGEN_HAS_SINGLE_INSTRUCTION_MADD 33 #if ((defined EIGEN_VECTORIZE_AVX) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_MINGW) && (__GXX_ABI_VERSION < 1004)) || EIGEN_OS_QNX 39 typedef eigen_packet_wrapper<__m128>
Packet4f;
40 typedef eigen_packet_wrapper<__m128d>
Packet2d;
46 typedef eigen_packet_wrapper<__m128i, 0>
Packet4i;
55 template<
int p,
int q,
int r,
int s>
57 enum { mask = (
s)<<6|(r)<<4|(
q)<<2|(
p) };
61 #define vec4f_swizzle1(v,p,q,r,s) \ 62 Packet4f(_mm_castsi128_ps(_mm_shuffle_epi32( _mm_castps_si128(v), (shuffle_mask<p,q,r,s>::mask)))) 64 #define vec4i_swizzle1(v,p,q,r,s) \ 65 Packet4i(_mm_shuffle_epi32( v, (shuffle_mask<p,q,r,s>::mask))) 67 #define vec2d_swizzle1(v,p,q) \ 68 Packet2d(_mm_castsi128_pd(_mm_shuffle_epi32( _mm_castpd_si128(v), (shuffle_mask<2*p,2*p+1,2*q,2*q+1>::mask)))) 70 #define vec4f_swizzle2(a,b,p,q,r,s) \ 71 Packet4f(_mm_shuffle_ps( (a), (b), (shuffle_mask<p,q,r,s>::mask))) 73 #define vec4i_swizzle2(a,b,p,q,r,s) \ 74 Packet4i(_mm_castps_si128( (_mm_shuffle_ps( _mm_castsi128_ps(a), _mm_castsi128_ps(b), (shuffle_mask<p,q,r,s>::mask))))) 86 return Packet4f(_mm_unpacklo_ps(a,b));
90 return Packet4f(_mm_unpackhi_ps(a,b));
92 #define vec4f_duplane(a,p) \ 93 vec4f_swizzle2(a,a,p,p,p,p) 95 #define vec2d_swizzle2(a,b,mask) \ 96 Packet2d(_mm_shuffle_pd(a,b,mask)) 100 return Packet2d(_mm_unpacklo_pd(a,b));
104 return Packet2d(_mm_unpackhi_pd(a,b));
106 #define vec2d_duplane(a,p) \ 107 vec2d_swizzle2(a,a,(p<<1)|p) 109 #define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \ 110 const Packet4f p4f_##NAME = pset1<Packet4f>(X) 112 #define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \ 113 const Packet2d p2d_##NAME = pset1<Packet2d>(X) 115 #define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \ 116 const Packet4f p4f_##NAME = pset1frombits<Packet4f>(X) 118 #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ 119 const Packet4i p4i_##NAME = pset1<Packet4i>(X) 124 #ifndef EIGEN_VECTORIZE_AVX 126 struct packet_traits<
float> : default_packet_traits {
152 #ifdef EIGEN_VECTORIZE_SSE4_1 177 #ifdef EIGEN_VECTORIZE_SSE4_1 226 enum {
size=4, alignment=
Aligned16, vectorizable=
true, masked_load_available=
false, masked_store_available=
false};
231 enum {
size=2, alignment=
Aligned16, vectorizable=
true, masked_load_available=
false, masked_store_available=
false};
236 enum {
size=4, alignment=
Aligned16, vectorizable=
false, masked_load_available=
false, masked_store_available=
false};
241 enum {
size=16, alignment=
Aligned16, vectorizable=
true, masked_load_available=
false, masked_store_available=
false};
244 #ifndef EIGEN_VECTORIZE_AVX 249 #if EIGEN_COMP_MSVC==1500 279 #if EIGEN_COMP_GNUC_STRICT && (!defined __AVX__) 303 #ifdef EIGEN_VECTORIZE_SSE3 304 return _mm_addsub_ps(a,b);
306 const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0
x0,0x80000000,0
x0));
314 #ifdef EIGEN_VECTORIZE_SSE3 315 return _mm_addsub_pd(a,b);
317 const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0
x0,0x80000000,0
x0,0
x0));
324 const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
325 return _mm_xor_ps(a,mask);
329 const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0
x0,0x80000000,0
x0,0x80000000));
330 return _mm_xor_pd(a,mask);
350 #ifdef EIGEN_VECTORIZE_SSE4_1 351 return _mm_mullo_epi32(a,b);
371 #ifdef EIGEN_VECTORIZE_FMA 372 template<>
EIGEN_STRONG_INLINE Packet4f
pmadd(
const Packet4f& a,
const Packet4f& b,
const Packet4f& c) {
return _mm_fmadd_ps(a,b,c); }
373 template<>
EIGEN_STRONG_INLINE Packet2d
pmadd(
const Packet2d& a,
const Packet2d& b,
const Packet2d& c) {
return _mm_fmadd_pd(a,b,c); }
376 #ifdef EIGEN_VECTORIZE_SSE4_1 378 return _mm_blendv_ps(b,a,mask);
382 return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(b),_mm_castsi128_ps(a),_mm_castsi128_ps(mask)));
385 template<>
EIGEN_DEVICE_FUNC inline Packet2d
pselect(
const Packet2d& mask,
const Packet2d& a,
const Packet2d& b) {
return _mm_blendv_pd(b,a,mask); }
387 template<>
EIGEN_DEVICE_FUNC inline Packet16b
pselect(
const Packet16b& mask,
const Packet16b& a,
const Packet16b& b) {
388 return _mm_blendv_epi8(b,a,mask);
392 Packet16b a_part = _mm_and_si128(mask, a);
393 Packet16b b_part = _mm_andnot_si128(mask, b);
394 return _mm_or_si128(a_part, b_part);
402 Packet4i b = _mm_castps_si128(a);
403 return _mm_castsi128_ps(_mm_cmpeq_epi32(b, b));
407 Packet4i b = _mm_castpd_si128(a);
408 return _mm_castsi128_pd(_mm_cmpeq_epi32(b, b));
447 #if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63 452 #ifdef EIGEN_VECTORIZE_AVX 454 asm(
"vminps %[a], %[b], %[res]" : [
res]
"=x" (
res) : [a]
"x" (a), [
b]
"x" (
b));
457 asm(
"minps %[a], %[res]" : [
res]
"+x" (
res) : [a]
"x" (a));
462 return _mm_min_ps(b, a);
466 #if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63 471 #ifdef EIGEN_VECTORIZE_AVX 473 asm(
"vminpd %[a], %[b], %[res]" : [
res]
"=x" (
res) : [a]
"x" (a), [
b]
"x" (
b));
476 asm(
"minpd %[a], %[res]" : [
res]
"+x" (
res) : [a]
"x" (a));
481 return _mm_min_pd(b, a);
486 #ifdef EIGEN_VECTORIZE_SSE4_1 487 return _mm_min_epi32(a,b);
490 Packet4i mask = _mm_cmplt_epi32(a,b);
491 return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
497 #if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63 502 #ifdef EIGEN_VECTORIZE_AVX 504 asm(
"vmaxps %[a], %[b], %[res]" : [
res]
"=x" (
res) : [a]
"x" (a), [
b]
"x" (
b));
507 asm(
"maxps %[a], %[res]" : [
res]
"+x" (
res) : [a]
"x" (a));
512 return _mm_max_ps(b, a);
516 #if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63 521 #ifdef EIGEN_VECTORIZE_AVX 523 asm(
"vmaxpd %[a], %[b], %[res]" : [
res]
"=x" (
res) : [a]
"x" (a), [
b]
"x" (
b));
526 asm(
"maxpd %[a], %[res]" : [
res]
"+x" (
res) : [a]
"x" (a));
531 return _mm_max_pd(b, a);
536 #ifdef EIGEN_VECTORIZE_SSE4_1 537 return _mm_max_epi32(a,b);
540 Packet4i mask = _mm_cmpgt_epi32(a,b);
541 return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
545 template <
typename Packet,
typename Op>
551 return pselect<Packet>(not_nan_mask_a,
m,
b);
554 template <
typename Packet,
typename Op>
560 return pselect<Packet>(not_nan_mask_a,
m,
a);
603 const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
604 return _mm_and_ps(a,mask);
608 const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
609 return _mm_and_pd(a,mask);
613 #ifdef EIGEN_VECTORIZE_SSSE3 614 return _mm_abs_epi32(a);
616 Packet4i aux = _mm_srai_epi32(a,31);
617 return _mm_sub_epi32(_mm_xor_si128(a,aux),aux);
621 #ifdef EIGEN_VECTORIZE_SSE4_1 627 return _mm_round_ps(
padd(
por(
pand(a, mask), prev0dot5), a), _MM_FROUND_TO_ZERO);
632 const Packet2d mask = _mm_castsi128_pd(_mm_set_epi64x(0x8000000000000000ull, 0x8000000000000000ull));
633 const Packet2d prev0dot5 = _mm_castsi128_pd(_mm_set_epi64x(0x3FDFFFFFFFFFFFFFull, 0x3FDFFFFFFFFFFFFFull));
634 return _mm_round_pd(
padd(
por(
pand(a, mask), prev0dot5), a), _MM_FROUND_TO_ZERO);
638 template<>
EIGEN_STRONG_INLINE Packet2d print<Packet2d>(
const Packet2d&
a) {
return _mm_round_pd(a, _MM_FROUND_CUR_DIRECTION); }
649 const Packet4f abs_a =
pabs(a);
650 Packet4f r =
padd(abs_a, limit);
663 const Packet2d abs_a =
pabs(a);
664 Packet2d r =
padd(abs_a, limit);
679 Packet4f mask = _mm_cmpgt_ps(tmp, a);
680 mask =
pand(mask, cst_1);
681 return psub(tmp, mask);
687 Packet2d tmp = print<Packet2d>(
a);
689 Packet2d mask = _mm_cmpgt_pd(tmp, a);
690 mask =
pand(mask, cst_1);
691 return psub(tmp, mask);
699 Packet4f mask = _mm_cmplt_ps(tmp, a);
700 mask =
pand(mask, cst_1);
701 return padd(tmp, mask);
707 Packet2d tmp = print<Packet2d>(
a);
709 Packet2d mask = _mm_cmplt_pd(tmp, a);
710 mask =
pand(mask, cst_1);
711 return padd(tmp, mask);
723 #if (EIGEN_COMP_MSVC==1600) 726 __m128
res = _mm_loadl_pi(_mm_set1_ps(0.0
f), (
const __m64*)(from));
727 res = _mm_loadh_pi(res, (
const __m64*)(from+2));
730 return _mm_loadu_ps(from);
739 return _mm_loadu_ps(from);
746 return _mm_loadu_pd(from);
751 return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from));
755 return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from));
761 return vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd(reinterpret_cast<const double*>(from))), 0, 0, 1, 1);
768 tmp = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(from));
776 __m128i tmp = _mm_castpd_si128(pload1<Packet2d>(reinterpret_cast<const double*>(from)));
777 return _mm_unpacklo_epi8(tmp, tmp);
784 __m128i tmp = _mm_castps_si128(
pload1<Packet4f>(reinterpret_cast<const float*>(from)));
785 tmp = _mm_unpacklo_epi8(tmp, tmp);
786 return _mm_unpacklo_epi16(tmp, tmp);
801 return _mm_set_ps(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
805 return _mm_set_pd(from[1*stride], from[0*stride]);
809 return _mm_set_epi32(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
814 return _mm_set_epi8(from[15*stride], from[14*stride], from[13*stride], from[12*stride],
815 from[11*stride], from[10*stride], from[9*stride], from[8*stride],
816 from[7*stride], from[6*stride], from[5*stride], from[4*stride],
817 from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
822 to[stride*0] = _mm_cvtss_f32(from);
823 to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 1));
824 to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 2));
825 to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 3));
829 to[stride*0] = _mm_cvtsd_f64(from);
830 to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(from, from, 1));
834 to[stride*0] = _mm_cvtsi128_si32(from);
835 to[stride*1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1));
836 to[stride*2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2));
837 to[stride*3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3));
841 to[4*stride*0] = _mm_cvtsi128_si32(from);
842 to[4*stride*1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1));
843 to[4*stride*2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2));
844 to[4*stride*3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3));
851 Packet4f pa = _mm_set_ss(a);
857 Packet2d pa = _mm_set_sd(a);
861 #if EIGEN_COMP_PGI && EIGEN_COMP_PGI < 1900 867 #ifndef EIGEN_VECTORIZE_AVX 873 #if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64 879 #elif EIGEN_COMP_MSVC_STRICT 895 #ifdef EIGEN_VECTORIZE_SSSE3 896 __m128i mask = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
897 return _mm_shuffle_epi8(a, mask);
899 Packet16b tmp = _mm_shuffle_epi32(a, _MM_SHUFFLE(0, 1, 2, 3));
900 tmp = _mm_shufflehi_epi16(_mm_shufflelo_epi16(tmp, _MM_SHUFFLE(2, 3, 0, 1)), _MM_SHUFFLE(2, 3, 0, 1));
901 return _mm_or_si128(_mm_slli_epi16(tmp, 8), _mm_srli_epi16(tmp, 8));
914 __m128i a_expo = _mm_srli_epi64(_mm_castpd_si128(
pand(a, cst_exp_mask)), 52);
931 const Packet2d
e =
pmin(
pmax(exponent,
pnegate(max_exponent)), max_exponent);
934 const Packet4i ei =
vec4i_swizzle1(_mm_cvtpd_epi32(e), 0, 3, 1, 3);
937 const Packet4i bias = _mm_set_epi32(0, 1023, 0, 1023);
938 Packet4i b = parithmetic_shift_right<2>(ei);
939 Packet2d c = _mm_castsi128_pd(_mm_slli_epi64(
padd(b, bias), 52));
942 c = _mm_castsi128_pd(_mm_slli_epi64(
padd(b, bias), 52));
951 Packet4f& a0, Packet4f&
a1, Packet4f&
a2, Packet4f&
a3)
961 Packet2d& a0, Packet2d&
a1, Packet2d&
a2, Packet2d&
a3)
963 #ifdef EIGEN_VECTORIZE_SSE3 964 a0 = _mm_loaddup_pd(a+0);
965 a1 = _mm_loaddup_pd(a+1);
966 a2 = _mm_loaddup_pd(a+2);
967 a3 = _mm_loaddup_pd(a+3);
981 vecs[1] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x55));
982 vecs[2] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xAA));
983 vecs[3] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xFF));
984 vecs[0] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x00));
995 Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a));
1011 #ifdef EIGEN_VECTORIZE_SSSE3 1014 Packet4i tmp0 = _mm_hadd_epi32(a,a);
1021 Packet4i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a,a));
1027 Packet4i tmp = _mm_or_si128(a, _mm_unpackhi_epi64(a,a));
1037 Packet4f tmp = _mm_mul_ps(a, _mm_movehl_ps(a,a));
1051 return (aux[0] * aux[1]) * (aux[2] * aux[3]);
1055 Packet4i tmp = _mm_and_si128(a, _mm_unpackhi_epi64(a,a));
1063 Packet4f tmp = _mm_min_ps(a, _mm_movehl_ps(a,a));
1072 #ifdef EIGEN_VECTORIZE_SSE4_1 1073 Packet4i tmp = _mm_min_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
1080 int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
1081 int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
1082 return aux0<aux2 ? aux0 : aux2;
1083 #endif // EIGEN_VECTORIZE_SSE4_1 1089 Packet4f tmp = _mm_max_ps(a, _mm_movehl_ps(a,a));
1098 #ifdef EIGEN_VECTORIZE_SSE4_1 1099 Packet4i tmp = _mm_max_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
1106 int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
1107 int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
1108 return aux0>aux2 ? aux0 : aux2;
1109 #endif // EIGEN_VECTORIZE_SSE4_1 1120 return _mm_movemask_ps(x) != 0x0;
1130 __m128d tmp = _mm_unpackhi_pd(kernel.
packet[0], kernel.
packet[1]);
1137 __m128i T0 = _mm_unpacklo_epi32(kernel.
packet[0], kernel.
packet[1]);
1138 __m128i
T1 = _mm_unpacklo_epi32(kernel.
packet[2], kernel.
packet[3]);
1139 __m128i
T2 = _mm_unpackhi_epi32(kernel.
packet[0], kernel.
packet[1]);
1140 __m128i
T3 = _mm_unpackhi_epi32(kernel.
packet[2], kernel.
packet[3]);
1142 kernel.
packet[0] = _mm_unpacklo_epi64(T0, T1);
1143 kernel.
packet[1] = _mm_unpackhi_epi64(T0, T1);
1144 kernel.
packet[2] = _mm_unpacklo_epi64(T2, T3);
1145 kernel.
packet[3] = _mm_unpackhi_epi64(T2, T3);
1150 __m128i T0 = _mm_unpacklo_epi8(kernel.
packet[0], kernel.
packet[1]);
1151 __m128i
T1 = _mm_unpackhi_epi8(kernel.
packet[0], kernel.
packet[1]);
1152 __m128i
T2 = _mm_unpacklo_epi8(kernel.
packet[2], kernel.
packet[3]);
1153 __m128i
T3 = _mm_unpackhi_epi8(kernel.
packet[2], kernel.
packet[3]);
1154 kernel.
packet[0] = _mm_unpacklo_epi16(T0, T2);
1155 kernel.
packet[1] = _mm_unpackhi_epi16(T0, T2);
1156 kernel.
packet[2] = _mm_unpacklo_epi16(T1, T3);
1157 kernel.
packet[3] = _mm_unpackhi_epi16(T1, T3);
1173 __m128i t0 = _mm_unpacklo_epi8(kernel.
packet[0], kernel.
packet[1]);
1174 __m128i t1 = _mm_unpackhi_epi8(kernel.
packet[0], kernel.
packet[1]);
1175 __m128i t2 = _mm_unpacklo_epi8(kernel.
packet[2], kernel.
packet[3]);
1176 __m128i t3 = _mm_unpackhi_epi8(kernel.
packet[2], kernel.
packet[3]);
1177 __m128i t4 = _mm_unpacklo_epi8(kernel.
packet[4], kernel.
packet[5]);
1178 __m128i t5 = _mm_unpackhi_epi8(kernel.
packet[4], kernel.
packet[5]);
1179 __m128i t6 = _mm_unpacklo_epi8(kernel.
packet[6], kernel.
packet[7]);
1180 __m128i t7 = _mm_unpackhi_epi8(kernel.
packet[6], kernel.
packet[7]);
1181 __m128i t8 = _mm_unpacklo_epi8(kernel.
packet[8], kernel.
packet[9]);
1182 __m128i t9 = _mm_unpackhi_epi8(kernel.
packet[8], kernel.
packet[9]);
1183 __m128i ta = _mm_unpacklo_epi8(kernel.
packet[10], kernel.
packet[11]);
1184 __m128i tb = _mm_unpackhi_epi8(kernel.
packet[10], kernel.
packet[11]);
1185 __m128i tc = _mm_unpacklo_epi8(kernel.
packet[12], kernel.
packet[13]);
1186 __m128i td = _mm_unpackhi_epi8(kernel.
packet[12], kernel.
packet[13]);
1187 __m128i te = _mm_unpacklo_epi8(kernel.
packet[14], kernel.
packet[15]);
1188 __m128i tf = _mm_unpackhi_epi8(kernel.
packet[14], kernel.
packet[15]);
1190 __m128i s0 = _mm_unpacklo_epi16(t0, t2);
1191 __m128i s1 = _mm_unpackhi_epi16(t0, t2);
1192 __m128i s2 = _mm_unpacklo_epi16(t1, t3);
1193 __m128i s3 = _mm_unpackhi_epi16(t1, t3);
1194 __m128i s4 = _mm_unpacklo_epi16(t4, t6);
1195 __m128i s5 = _mm_unpackhi_epi16(t4, t6);
1196 __m128i s6 = _mm_unpacklo_epi16(t5, t7);
1197 __m128i s7 = _mm_unpackhi_epi16(t5, t7);
1198 __m128i s8 = _mm_unpacklo_epi16(t8, ta);
1199 __m128i s9 = _mm_unpackhi_epi16(t8, ta);
1200 __m128i sa = _mm_unpacklo_epi16(t9, tb);
1201 __m128i sb = _mm_unpackhi_epi16(t9, tb);
1202 __m128i sc = _mm_unpacklo_epi16(tc, te);
1203 __m128i sd = _mm_unpackhi_epi16(tc, te);
1204 __m128i se = _mm_unpacklo_epi16(td, tf);
1205 __m128i sf = _mm_unpackhi_epi16(td, tf);
1207 __m128i
u0 = _mm_unpacklo_epi32(s0, s4);
1208 __m128i u1 = _mm_unpackhi_epi32(s0, s4);
1209 __m128i
u2 = _mm_unpacklo_epi32(s1, s5);
1210 __m128i u3 = _mm_unpackhi_epi32(s1, s5);
1211 __m128i u4 = _mm_unpacklo_epi32(s2, s6);
1212 __m128i u5 = _mm_unpackhi_epi32(s2, s6);
1213 __m128i u6 = _mm_unpacklo_epi32(s3, s7);
1214 __m128i u7 = _mm_unpackhi_epi32(s3, s7);
1215 __m128i u8 = _mm_unpacklo_epi32(s8, sc);
1216 __m128i u9 = _mm_unpackhi_epi32(s8, sc);
1217 __m128i ua = _mm_unpacklo_epi32(s9, sd);
1218 __m128i ub = _mm_unpackhi_epi32(s9, sd);
1219 __m128i uc = _mm_unpacklo_epi32(sa, se);
1220 __m128i ud = _mm_unpackhi_epi32(sa, se);
1221 __m128i ue = _mm_unpacklo_epi32(sb, sf);
1222 __m128i uf = _mm_unpackhi_epi32(sb, sf);
1224 kernel.
packet[0] = _mm_unpacklo_epi64(u0, u8);
1225 kernel.
packet[1] = _mm_unpackhi_epi64(u0, u8);
1226 kernel.
packet[2] = _mm_unpacklo_epi64(u1, u9);
1227 kernel.
packet[3] = _mm_unpackhi_epi64(u1, u9);
1228 kernel.
packet[4] = _mm_unpacklo_epi64(u2, ua);
1229 kernel.
packet[5] = _mm_unpackhi_epi64(u2, ua);
1230 kernel.
packet[6] = _mm_unpacklo_epi64(u3, ub);
1231 kernel.
packet[7] = _mm_unpackhi_epi64(u3, ub);
1232 kernel.
packet[8] = _mm_unpacklo_epi64(u4, uc);
1233 kernel.
packet[9] = _mm_unpackhi_epi64(u4, uc);
1234 kernel.
packet[10] = _mm_unpacklo_epi64(u5, ud);
1235 kernel.
packet[11] = _mm_unpackhi_epi64(u5, ud);
1236 kernel.
packet[12] = _mm_unpacklo_epi64(u6, ue);
1237 kernel.
packet[13] = _mm_unpackhi_epi64(u6, ue);
1238 kernel.
packet[14] = _mm_unpacklo_epi64(u7, uf);
1239 kernel.
packet[15] = _mm_unpackhi_epi64(u7, uf);
1243 const __m128i
zero = _mm_setzero_si128();
1244 const __m128i select = _mm_set_epi32(ifPacket.
select[3], ifPacket.
select[2], ifPacket.
select[1], ifPacket.
select[0]);
1245 __m128i false_mask = _mm_cmpeq_epi32(select, zero);
1246 #ifdef EIGEN_VECTORIZE_SSE4_1 1247 return _mm_blendv_epi8(thenPacket, elsePacket, false_mask);
1249 return _mm_or_si128(_mm_andnot_si128(false_mask, thenPacket), _mm_and_si128(false_mask, elsePacket));
1253 const __m128
zero = _mm_setzero_ps();
1255 __m128 false_mask = _mm_cmpeq_ps(select, zero);
1256 #ifdef EIGEN_VECTORIZE_SSE4_1 1257 return _mm_blendv_ps(thenPacket, elsePacket, false_mask);
1259 return _mm_or_ps(_mm_andnot_ps(false_mask, thenPacket), _mm_and_ps(false_mask, elsePacket));
1263 const __m128d
zero = _mm_setzero_pd();
1264 const __m128d select = _mm_set_pd(ifPacket.
select[1], ifPacket.
select[0]);
1265 __m128d false_mask = _mm_cmpeq_pd(select, zero);
1266 #ifdef EIGEN_VECTORIZE_SSE4_1 1267 return _mm_blendv_pd(thenPacket, elsePacket, false_mask);
1269 return _mm_or_pd(_mm_andnot_pd(false_mask, thenPacket), _mm_and_pd(false_mask, elsePacket));
1274 #ifdef EIGEN_VECTORIZE_FMA 1276 return ::fmaf(a,b,c);
1279 return ::fma(a,b,c);
1298 typedef Packet4h
type;
1300 typedef Packet4h
half;
1303 AlignedOnScalar = 1,
1330 result.x = _mm_set1_pi16(from.x);
1341 __int64_t a64 = _mm_cvtm64_si64(a.x);
1342 __int64_t b64 = _mm_cvtm64_si64(b.x);
1359 result.x = _mm_set_pi16(h[3].x, h[2].x, h[1].x, h[0].x);
1364 __int64_t a64 = _mm_cvtm64_si64(a.x);
1365 __int64_t b64 = _mm_cvtm64_si64(b.x);
1382 result.x = _mm_set_pi16(h[3].x, h[2].x, h[1].x, h[0].x);
1387 __int64_t a64 = _mm_cvtm64_si64(a.x);
1388 __int64_t b64 = _mm_cvtm64_si64(b.x);
1405 result.x = _mm_set_pi16(h[3].x, h[2].x, h[1].x, h[0].x);
1410 __int64_t a64 = _mm_cvtm64_si64(a.x);
1411 __int64_t b64 = _mm_cvtm64_si64(b.x);
1428 result.x = _mm_set_pi16(h[3].x, h[2].x, h[1].x, h[0].x);
1434 result.x = _mm_cvtsi64_m64(*reinterpret_cast<const __int64_t*>(from));
1440 result.x = _mm_cvtsi64_m64(*reinterpret_cast<const __int64_t*>(from));
1445 __int64_t r = _mm_cvtm64_si64(from.x);
1446 *(
reinterpret_cast<__int64_t*
>(to)) = r;
1450 __int64_t r = _mm_cvtm64_si64(from.x);
1451 *(
reinterpret_cast<__int64_t*
>(to)) = r;
1456 return pset1<Packet4h>(*from);
1462 result.x = _mm_set_pi16(from[3*stride].x, from[2*stride].x, from[1*stride].x, from[0*stride].x);
1468 __int64_t a = _mm_cvtm64_si64(from.x);
1469 to[stride*0].x =
static_cast<unsigned short>(
a);
1470 to[stride*1].x =
static_cast<unsigned short>(a >> 16);
1471 to[stride*2].x =
static_cast<unsigned short>(a >> 32);
1472 to[stride*3].x =
static_cast<unsigned short>(a >> 48);
1477 __m64 T0 = _mm_unpacklo_pi16(kernel.
packet[0].x, kernel.
packet[1].x);
1478 __m64
T1 = _mm_unpacklo_pi16(kernel.
packet[2].x, kernel.
packet[3].x);
1479 __m64
T2 = _mm_unpackhi_pi16(kernel.
packet[0].x, kernel.
packet[1].x);
1480 __m64
T3 = _mm_unpackhi_pi16(kernel.
packet[2].x, kernel.
packet[3].x);
1482 kernel.
packet[0].x = _mm_unpacklo_pi32(T0, T1);
1483 kernel.
packet[1].x = _mm_unpackhi_pi32(T0, T1);
1484 kernel.
packet[2].x = _mm_unpacklo_pi32(T2, T3);
1485 kernel.
packet[3].x = _mm_unpackhi_pi32(T2, T3);
1495 #if EIGEN_COMP_PGI && EIGEN_COMP_PGI < 1900 1497 static inline __m128 _mm_castpd_ps (__m128d
x) {
return reinterpret_cast<__m128&
>(
x); }
1498 static inline __m128i _mm_castpd_si128(__m128d
x) {
return reinterpret_cast<__m128i&
>(
x); }
1499 static inline __m128d _mm_castps_pd (__m128
x) {
return reinterpret_cast<__m128d&
>(
x); }
1500 static inline __m128i _mm_castps_si128(__m128
x) {
return reinterpret_cast<__m128i&
>(
x); }
1501 static inline __m128 _mm_castsi128_ps(__m128i
x) {
return reinterpret_cast<__m128&
>(
x); }
1502 static inline __m128d _mm_castsi128_pd(__m128i
x) {
return reinterpret_cast<__m128d&
>(
x); }
1505 #endif // EIGEN_PACKET_MATH_SSE_H
EIGEN_STRONG_INLINE Packet4i ploaddup< Packet4i >(const int *from)
EIGEN_DEVICE_FUNC void pscatter< int, Packet4i >(int *to, const Packet4i &from, Index stride)
EIGEN_STRONG_INLINE Packet4f pset1frombits< Packet4f >(unsigned int from)
EIGEN_STRONG_INLINE Packet4f pxor< Packet4f >(const Packet4f &a, const Packet4f &b)
#define EIGEN_STRONG_INLINE
EIGEN_STRONG_INLINE Packet4f pround< Packet4f >(const Packet4f &a)
EIGEN_STRONG_INLINE Packet2d plset< Packet2d >(const double &a)
EIGEN_STRONG_INLINE bool predux_any(const Packet4f &x)
#define EIGEN_OPTIMIZATION_BARRIER(X)
EIGEN_STRONG_INLINE int pfirst< Packet4i >(const Packet4i &a)
EIGEN_DEVICE_FUNC void pscatter< float, Packet4f >(float *to, const Packet4f &from, Index stride)
EIGEN_STRONG_INLINE double predux_min< Packet2d >(const Packet2d &a)
EIGEN_STRONG_INLINE Packet4i pload< Packet4i >(const int *from)
EIGEN_STRONG_INLINE Packet2d pmin< PropagateNumbers, Packet2d >(const Packet2d &a, const Packet2d &b)
static const Pose3 T3(Rot3::Rodrigues(-90, 0, 0), Point3(1, 2, 3))
EIGEN_STRONG_INLINE Packet4f pmin< PropagateNumbers, Packet4f >(const Packet4f &a, const Packet4f &b)
EIGEN_STRONG_INLINE Packet2d pset1frombits< Packet2d >(uint64_t from)
EIGEN_STRONG_INLINE Packet4f pfrexp< Packet4f >(const Packet4f &a, Packet4f &exponent)
EIGEN_STRONG_INLINE Packet4d pfrexp_generic_get_biased_exponent(const Packet4d &a)
EIGEN_STRONG_INLINE double predux_max< Packet2d >(const Packet2d &a)
EIGEN_STRONG_INLINE Packet2d paddsub< Packet2d >(const Packet2d &a, const Packet2d &b)
EIGEN_STRONG_INLINE Packet4i plset< Packet4i >(const int &a)
EIGEN_STRONG_INLINE Packet4f pmax< PropagateNumbers, Packet4f >(const Packet4f &a, const Packet4f &b)
#define EIGEN_DEBUG_UNALIGNED_LOAD
std::ofstream out("Result.txt")
EIGEN_STRONG_INLINE Packet4f pcmp_le(const Packet4f &a, const Packet4f &b)
EIGEN_STRONG_INLINE Packet2d pceil< Packet2d >(const Packet2d &a)
EIGEN_STRONG_INLINE bool predux< Packet16b >(const Packet16b &a)
EIGEN_STRONG_INLINE float pfirst< Packet4f >(const Packet4f &a)
EIGEN_DONT_INLINE Scalar zero()
EIGEN_STRONG_INLINE Packet2d pmax< PropagateNumbers, Packet2d >(const Packet2d &a, const Packet2d &b)
EIGEN_STRONG_INLINE Packet2d vec2d_unpacklo(const Packet2d &a, const Packet2d &b)
EIGEN_STRONG_INLINE Packet8f pzero(const Packet8f &)
EIGEN_STRONG_INLINE Packet4f ploaddup< Packet4f >(const float *from)
EIGEN_STRONG_INLINE Packet4i parithmetic_shift_right(const Packet4i &a)
eigen_packet_wrapper< __m128i, 1 > Packet16b
EIGEN_STRONG_INLINE void pbroadcast4< Packet4f >(const float *a, Packet4f &a0, Packet4f &a1, Packet4f &a2, Packet4f &a3)
EIGEN_STRONG_INLINE int predux< Packet4i >(const Packet4i &a)
#define EIGEN_DEBUG_ALIGNED_STORE
Namespace containing all symbols from the Eigen library.
EIGEN_STRONG_INLINE Packet4i ploadu< Packet4i >(const int *from)
EIGEN_STRONG_INLINE float predux_max< Packet4f >(const Packet4f &a)
EIGEN_DEVICE_FUNC void pscatter< bool, Packet16b >(bool *to, const Packet16b &from, Index stride)
EIGEN_STRONG_INLINE Packet4f vec4f_movelh(const Packet4f &a, const Packet4f &b)
EIGEN_STRONG_INLINE void pstore1< Packet2d >(double *to, const double &a)
EIGEN_STRONG_INLINE Packet16b pand< Packet16b >(const Packet16b &a, const Packet16b &b)
static const Pose3 T2(Rot3::Rodrigues(0.3, 0.2, 0.1), P2)
EIGEN_STRONG_INLINE Packet4f print(const Packet4f &a)
#define EIGEN_DEBUG_UNALIGNED_STORE
EIGEN_STRONG_INLINE void prefetch< float >(const float *addr)
EIGEN_STRONG_INLINE Packet2d vec2d_unpackhi(const Packet2d &a, const Packet2d &b)
EIGEN_STRONG_INLINE Packet8h pxor(const Packet8h &a, const Packet8h &b)
EIGEN_STRONG_INLINE int predux_min< Packet4i >(const Packet4i &a)
EIGEN_STRONG_INLINE void pstoreu< double >(double *to, const Packet4d &from)
EIGEN_STRONG_INLINE bool predux_mul< Packet16b >(const Packet16b &a)
EIGEN_STRONG_INLINE Packet2d pldexp< Packet2d >(const Packet2d &a, const Packet2d &exponent)
EIGEN_STRONG_INLINE Packet16b pxor< Packet16b >(const Packet16b &a, const Packet16b &b)
#define EIGEN_DEBUG_ALIGNED_LOAD
EIGEN_STRONG_INLINE Packet16b ploaddup< Packet16b >(const bool *from)
#define vec4i_swizzle1(v, p, q, r, s)
EIGEN_STRONG_INLINE Packet4f pcmp_lt(const Packet4f &a, const Packet4f &b)
EIGEN_STRONG_INLINE Packet2d padd< Packet2d >(const Packet2d &a, const Packet2d &b)
EIGEN_STRONG_INLINE Packet2d ploaddup< Packet2d >(const double *from)
EIGEN_STRONG_INLINE double predux_mul< Packet2d >(const Packet2d &a)
EIGEN_DEVICE_FUNC Packet4i pgather< int, Packet4i >(const int *from, Index stride)
EIGEN_DEVICE_FUNC Packet padd(const Packet &a, const Packet &b)
EIGEN_STRONG_INLINE Packet2d pdiv< Packet2d >(const Packet2d &a, const Packet2d &b)
EIGEN_DEVICE_FUNC Packet pmin(const Packet &a, const Packet &b)
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
EIGEN_STRONG_INLINE Packet4i pmul< Packet4i >(const Packet4i &a, const Packet4i &b)
#define vec2d_swizzle1(v, p, q)
EIGEN_STRONG_INLINE Packet4f print< Packet4f >(const Packet4f &a)
EIGEN_STRONG_INLINE Packet2d pandnot< Packet2d >(const Packet2d &a, const Packet2d &b)
EIGEN_STRONG_INLINE bfloat16 pfirst(const Packet8bf &a)
EIGEN_STRONG_INLINE void pstore< double >(double *to, const Packet4d &from)
EIGEN_STRONG_INLINE Packet4f pceil< Packet4f >(const Packet4f &a)
EIGEN_STRONG_INLINE Packet4f pdiv< Packet4f >(const Packet4f &a, const Packet4f &b)
EIGEN_STRONG_INLINE Packet4f pmin< Packet4f >(const Packet4f &a, const Packet4f &b)
EIGEN_STRONG_INLINE Packet2d pmin< Packet2d >(const Packet2d &a, const Packet2d &b)
EIGEN_STRONG_INLINE void prefetch< int >(const int *addr)
EIGEN_STRONG_INLINE Packet2d pfrexp< Packet2d >(const Packet2d &a, Packet2d &exponent)
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Packet pfrexp_generic(const Packet &a, Packet &exponent)
EIGEN_STRONG_INLINE Packet4i pand< Packet4i >(const Packet4i &a, const Packet4i &b)
EIGEN_STRONG_INLINE Packet4f pload1< Packet4f >(const float *from)
EIGEN_STRONG_INLINE int predux_max< Packet4i >(const Packet4i &a)
EIGEN_STRONG_INLINE void pstore< bool >(bool *to, const Packet16b &from)
EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf &a, const Packet2cf &b)
EIGEN_STRONG_INLINE Packet4i pmin< Packet4i >(const Packet4i &a, const Packet4i &b)
EIGEN_STRONG_INLINE Packet4f ploadu< Packet4f >(const float *from)
EIGEN_STRONG_INLINE void ptranspose(PacketBlock< Packet2cf, 2 > &kernel)
EIGEN_STRONG_INLINE Packet pminmax_propagate_nan(const Packet &a, const Packet &b, Op op)
EIGEN_STRONG_INLINE Packet4i pxor< Packet4i >(const Packet4i &a, const Packet4i &b)
EIGEN_STRONG_INLINE void pstoreu< int >(int *to, const Packet4i &from)
EIGEN_STRONG_INLINE void pstore< float >(float *to, const Packet4f &from)
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
EIGEN_DEVICE_FUNC void pscatter< double, Packet2d >(double *to, const Packet2d &from, Index stride)
EIGEN_STRONG_INLINE Packet16b padd< Packet16b >(const Packet16b &a, const Packet16b &b)
EIGEN_STRONG_INLINE Packet2d ptrue< Packet2d >(const Packet2d &a)
EIGEN_STRONG_INLINE Packet4f por< Packet4f >(const Packet4f &a, const Packet4f &b)
EIGEN_STRONG_INLINE Packet16b ptrue< Packet16b >(const Packet16b &a)
EIGEN_STRONG_INLINE Packet2d pmax< Packet2d >(const Packet2d &a, const Packet2d &b)
EIGEN_STRONG_INLINE Packet4i plogical_shift_right(const Packet4i &a)
EIGEN_STRONG_INLINE Packet16b pset1< Packet16b >(const bool &from)
unsigned __int64 uint64_t
EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f &a, const Packet4f &b)
EIGEN_STRONG_INLINE Packet4f pldexp< Packet4f >(const Packet4f &a, const Packet4f &exponent)
EIGEN_STRONG_INLINE Packet4f vec4f_unpackhi(const Packet4f &a, const Packet4f &b)
Point2(* f)(const Point3 &, OptionalJacobian< 2, 3 >)
Array< double, 1, 3 > e(1./3., 0.5, 2.)
EIGEN_STRONG_INLINE void pstoreu< bool >(bool *to, const Packet16b &from)
EIGEN_STRONG_INLINE Packet16b pload< Packet16b >(const bool *from)
EIGEN_STRONG_INLINE Packet4f pandnot< Packet4f >(const Packet4f &a, const Packet4f &b)
EIGEN_STRONG_INLINE Packet4f pmin< PropagateNaN, Packet4f >(const Packet4f &a, const Packet4f &b)
EIGEN_DEVICE_FUNC const Scalar & q
EIGEN_STRONG_INLINE void pbroadcast4< Packet2d >(const double *a, Packet2d &a0, Packet2d &a1, Packet2d &a2, Packet2d &a3)
EIGEN_STRONG_INLINE Packet2d pmin< PropagateNaN, Packet2d >(const Packet2d &a, const Packet2d &b)
EIGEN_STRONG_INLINE Packet8h pand(const Packet8h &a, const Packet8h &b)
EIGEN_STRONG_INLINE float predux< Packet4f >(const Packet4f &a)
EIGEN_STRONG_INLINE Packet2d pand< Packet2d >(const Packet2d &a, const Packet2d &b)
EIGEN_STRONG_INLINE Packet16b psub< Packet16b >(const Packet16b &a, const Packet16b &b)
EIGEN_STRONG_INLINE Packet16b por< Packet16b >(const Packet16b &a, const Packet16b &b)
EIGEN_STRONG_INLINE Packet pminmax_propagate_numbers(const Packet &a, const Packet &b, Op op)
EIGEN_STRONG_INLINE Packet2d ploadu< Packet2d >(const double *from)
EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf &a)
EIGEN_STRONG_INLINE float predux_min< Packet4f >(const Packet4f &a)
EIGEN_STRONG_INLINE Packet2d pload< Packet2d >(const double *from)
EIGEN_STRONG_INLINE Packet4i plogical_shift_left(const Packet4i &a)
const char * SsePrefetchPtrType
EIGEN_STRONG_INLINE Packet2d psub< Packet2d >(const Packet2d &a, const Packet2d &b)
EIGEN_STRONG_INLINE void pstoreu< float >(float *to, const Packet4f &from)
EIGEN_STRONG_INLINE Packet4f vec4f_movehl(const Packet4f &a, const Packet4f &b)
EIGEN_STRONG_INLINE int predux_mul< Packet4i >(const Packet4i &a)
EIGEN_STRONG_INLINE Packet4f pmul< Packet4f >(const Packet4f &a, const Packet4f &b)
EIGEN_DEVICE_FUNC void pstore(Scalar *to, const Packet &from)
EIGEN_STRONG_INLINE Packet4f paddsub< Packet4f >(const Packet4f &a, const Packet4f &b)
EIGEN_STRONG_INLINE double predux< Packet2d >(const Packet2d &a)
EIGEN_STRONG_INLINE Packet4f pload< Packet4f >(const float *from)
EIGEN_DEVICE_FUNC Packet16b pgather< bool, Packet16b >(const bool *from, Index stride)
EIGEN_STRONG_INLINE Packet4i pmax< Packet4i >(const Packet4i &a, const Packet4i &b)
EIGEN_CONSTEXPR Index size(const T &x)
#define EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Packet16b ploadquad< Packet16b >(const bool *from)
EIGEN_STRONG_INLINE Packet2d pxor< Packet2d >(const Packet2d &a, const Packet2d &b)
EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf &a)
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __half_raw raw_uint16_to_half(numext::uint16_t x)
EIGEN_STRONG_INLINE Packet8f peven_mask(const Packet8f &)
EIGEN_STRONG_INLINE Packet4i por< Packet4i >(const Packet4i &a, const Packet4i &b)
static const Similarity3 T1(R, Point3(3.5, -8.2, 4.2), 1)
EIGEN_STRONG_INLINE Packet4i padd< Packet4i >(const Packet4i &a, const Packet4i &b)
EIGEN_STRONG_INLINE void pstore1< Packet4f >(float *to, const float &a)
EIGEN_STRONG_INLINE Packet4f vec4f_unpacklo(const Packet4f &a, const Packet4f &b)
EIGEN_STRONG_INLINE Packet4f padd< Packet4f >(const Packet4f &a, const Packet4f &b)
EIGEN_STRONG_INLINE void pstore< int >(int *to, const Packet4i &from)
EIGEN_STRONG_INLINE Packet4i pandnot< Packet4i >(const Packet4i &a, const Packet4i &b)
EIGEN_STRONG_INLINE Packet4f pmax< PropagateNaN, Packet4f >(const Packet4f &a, const Packet4f &b)
EIGEN_STRONG_INLINE Packet4f pfloor< Packet4f >(const Packet4f &a)
EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
EIGEN_DEVICE_FUNC Packet psub(const Packet &a, const Packet &b)
EIGEN_STRONG_INLINE Packet2d pmax< PropagateNaN, Packet2d >(const Packet2d &a, const Packet2d &b)
EIGEN_STRONG_INLINE Packet2d pround< Packet2d >(const Packet2d &a)
EIGEN_STRONG_INLINE Packet4f pselect(const Packet4f &mask, const Packet4f &a, const Packet4f &b)
EIGEN_STRONG_INLINE Packet2d pfloor< Packet2d >(const Packet2d &a)
EIGEN_STRONG_INLINE Packet4i ptrue< Packet4i >(const Packet4i &a)
EIGEN_STRONG_INLINE Packet2d pset1< Packet2d >(const double &from)
EIGEN_STRONG_INLINE Packet4f plset< Packet4f >(const float &a)
EIGEN_STRONG_INLINE Packet4f vec4f_swizzle1(const Packet4f &a, int p, int q, int r, int s)
EIGEN_DEVICE_FUNC Packet2d pgather< double, Packet2d >(const double *from, Index stride)
EIGEN_STRONG_INLINE Packet8h por(const Packet8h &a, const Packet8h &b)
EIGEN_STRONG_INLINE Packet2d pmul< Packet2d >(const Packet2d &a, const Packet2d &b)
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Packet pldexp_generic(const Packet &a, const Packet &exponent)
EIGEN_STRONG_INLINE Packet16b ploadu< Packet16b >(const bool *from)
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy x
EIGEN_STRONG_INLINE Packet4f pmax< Packet4f >(const Packet4f &a, const Packet4f &b)
EIGEN_STRONG_INLINE Packet4f ptrue< Packet4f >(const Packet4f &a)
EIGEN_STRONG_INLINE Packet4f pset1< Packet4f >(const float &from)
EIGEN_STRONG_INLINE void punpackp(Packet4f *vecs)
EIGEN_STRONG_INLINE double pfirst< Packet2d >(const Packet2d &a)
EIGEN_DEVICE_FUNC Packet pmul(const Packet &a, const Packet &b)
EIGEN_DEVICE_FUNC Packet4f pgather< float, Packet4f >(const float *from, Index stride)
EIGEN_STRONG_INLINE void prefetch< double >(const double *addr)
EIGEN_STRONG_INLINE Packet4i psub< Packet4i >(const Packet4i &a, const Packet4i &b)
EIGEN_STRONG_INLINE Packet4f psub< Packet4f >(const Packet4f &a, const Packet4f &b)
EIGEN_DEVICE_FUNC Packet pmax(const Packet &a, const Packet &b)
EIGEN_STRONG_INLINE Packet16b pmul< Packet16b >(const Packet16b &a, const Packet16b &b)
EIGEN_STRONG_INLINE bool pfirst< Packet16b >(const Packet16b &a)
EIGEN_STRONG_INLINE Packet4i pblend(const Selector< 4 > &ifPacket, const Packet4i &thenPacket, const Packet4i &elsePacket)
#define vec4i_swizzle2(a, b, p, q, r, s)
EIGEN_STRONG_INLINE Packet4i pset1< Packet4i >(const int &from)
EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf &a)
EIGEN_STRONG_INLINE Packet2d por< Packet2d >(const Packet2d &a, const Packet2d &b)
EIGEN_STRONG_INLINE float predux_mul< Packet4f >(const Packet4f &a)
EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f &a)
EIGEN_STRONG_INLINE Packet4f pand< Packet4f >(const Packet4f &a, const Packet4f &b)