15 #ifndef EIGEN_MATH_FUNCTIONS_SSE_H 16 #define EIGEN_MATH_FUNCTIONS_SSE_H 55 Packet4f invalid_mask = _mm_cmpnge_ps(x, _mm_setzero_ps());
56 Packet4f iszero_mask = _mm_cmpeq_ps(x, _mm_setzero_ps());
58 x =
pmax(x, p4f_min_norm_pos);
59 emm0 = _mm_srli_epi32(_mm_castps_si128(x), 23);
62 x = _mm_and_ps(x, p4f_inv_mant_mask);
63 x = _mm_or_ps(x, p4f_half);
65 emm0 = _mm_sub_epi32(emm0, p4i_0x7f);
74 Packet4f mask = _mm_cmplt_ps(x, p4f_cephes_SQRTHF);
84 y =
pmadd(p4f_cephes_log_p0, x, p4f_cephes_log_p1);
85 y1 =
pmadd(p4f_cephes_log_p3, x, p4f_cephes_log_p4);
86 y2 =
pmadd(p4f_cephes_log_p6, x, p4f_cephes_log_p7);
87 y =
pmadd(y , x, p4f_cephes_log_p2);
88 y1 =
pmadd(y1, x, p4f_cephes_log_p5);
89 y2 =
pmadd(y2, x, p4f_cephes_log_p8);
94 y1 =
pmul(e, p4f_cephes_log_q1);
95 tmp =
pmul(x2, p4f_half);
98 y2 =
pmul(e, p4f_cephes_log_q2);
102 return _mm_or_ps(_mm_andnot_ps(iszero_mask, _mm_or_ps(x, invalid_mask)),
103 _mm_and_ps(iszero_mask, p4f_minus_inf));
133 x =
pmax(
pmin(x, p4f_exp_hi), p4f_exp_lo);
136 fx =
pmadd(x, p4f_cephes_LOG2EF, p4f_half);
138 #ifdef EIGEN_VECTORIZE_SSE4_1 139 fx = _mm_floor_ps(fx);
141 emm0 = _mm_cvttps_epi32(fx);
142 tmp = _mm_cvtepi32_ps(emm0);
144 Packet4f mask = _mm_cmpgt_ps(tmp, fx);
145 mask = _mm_and_ps(mask, p4f_1);
146 fx =
psub(tmp, mask);
149 tmp =
pmul(fx, p4f_cephes_exp_C1);
157 y =
pmadd(y, x, p4f_cephes_exp_p1);
158 y =
pmadd(y, x, p4f_cephes_exp_p2);
159 y =
pmadd(y, x, p4f_cephes_exp_p3);
160 y =
pmadd(y, x, p4f_cephes_exp_p4);
161 y =
pmadd(y, x, p4f_cephes_exp_p5);
166 emm0 = _mm_cvttps_epi32(fx);
167 emm0 = _mm_add_epi32(emm0, p4i_0x7f);
168 emm0 = _mm_slli_epi32(emm0, 23);
196 static const __m128i p4i_1023_0 = _mm_setr_epi32(1023, 1023, 0, 0);
202 x =
pmax(
pmin(x, p2d_exp_hi), p2d_exp_lo);
204 fx =
pmadd(p2d_cephes_LOG2EF, x, p2d_half);
206 #ifdef EIGEN_VECTORIZE_SSE4_1 207 fx = _mm_floor_pd(fx);
209 emm0 = _mm_cvttpd_epi32(fx);
210 tmp = _mm_cvtepi32_pd(emm0);
212 Packet2d mask = _mm_cmpgt_pd(tmp, fx);
213 mask = _mm_and_pd(mask, p2d_1);
214 fx =
psub(tmp, mask);
217 tmp =
pmul(fx, p2d_cephes_exp_C1);
225 px =
pmadd(px, x2, p2d_cephes_exp_p1);
226 px =
pmadd(px, x2, p2d_cephes_exp_p2);
230 qx =
pmadd(qx, x2, p2d_cephes_exp_q1);
231 qx =
pmadd(qx, x2, p2d_cephes_exp_q2);
232 qx =
pmadd(qx, x2, p2d_cephes_exp_q3);
235 x =
pmadd(p2d_2,x,p2d_1);
238 emm0 = _mm_cvttpd_epi32(fx);
239 emm0 = _mm_add_epi32(emm0, p4i_1023_0);
240 emm0 = _mm_slli_epi32(emm0, 20);
241 emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(1,2,0,3));
292 sign_bit = _mm_and_ps(sign_bit, p4f_sign_mask);
295 y =
pmul(x, p4f_cephes_FOPI);
298 emm2 = _mm_cvttps_epi32(y);
300 emm2 = _mm_add_epi32(emm2, p4i_1);
301 emm2 = _mm_and_si128(emm2, p4i_not1);
302 y = _mm_cvtepi32_ps(emm2);
304 emm0 = _mm_and_si128(emm2, p4i_4);
305 emm0 = _mm_slli_epi32(emm0, 29);
312 emm2 = _mm_and_si128(emm2, p4i_2);
313 emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
315 Packet4f swap_sign_bit = _mm_castsi128_ps(emm0);
316 Packet4f poly_mask = _mm_castsi128_ps(emm2);
317 sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
321 xmm1 =
pmul(y, p4f_minus_cephes_DP1);
322 xmm2 =
pmul(y, p4f_minus_cephes_DP2);
323 xmm3 =
pmul(y, p4f_minus_cephes_DP3);
332 y =
pmadd(y, z, p4f_coscof_p1);
333 y =
pmadd(y, z, p4f_coscof_p2);
343 y2 =
pmadd(y2, z, p4f_sincof_p1);
344 y2 =
pmadd(y2, z, p4f_sincof_p2);
350 y2 = _mm_and_ps(poly_mask, y2);
351 y = _mm_andnot_ps(poly_mask, y);
354 return _mm_xor_ps(y, sign_bit);
387 y =
pmul(x, p4f_cephes_FOPI);
390 emm2 = _mm_cvttps_epi32(y);
392 emm2 = _mm_add_epi32(emm2, p4i_1);
393 emm2 = _mm_and_si128(emm2, p4i_not1);
394 y = _mm_cvtepi32_ps(emm2);
396 emm2 = _mm_sub_epi32(emm2, p4i_2);
399 emm0 = _mm_andnot_si128(emm2, p4i_4);
400 emm0 = _mm_slli_epi32(emm0, 29);
402 emm2 = _mm_and_si128(emm2, p4i_2);
403 emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
405 Packet4f sign_bit = _mm_castsi128_ps(emm0);
406 Packet4f poly_mask = _mm_castsi128_ps(emm2);
410 xmm1 =
pmul(y, p4f_minus_cephes_DP1);
411 xmm2 =
pmul(y, p4f_minus_cephes_DP2);
412 xmm3 =
pmul(y, p4f_minus_cephes_DP3);
421 y =
pmadd(y,z,p4f_coscof_p1);
422 y =
pmadd(y,z,p4f_coscof_p2);
425 Packet4f tmp = _mm_mul_ps(z, p4f_half);
431 y2 =
pmadd(y2, z, p4f_sincof_p1);
432 y2 =
pmadd(y2, z, p4f_sincof_p2);
434 y2 =
pmadd(y2, x, x);
437 y2 = _mm_and_ps(poly_mask, y2);
438 y = _mm_andnot_ps(poly_mask, y);
442 return _mm_xor_ps(y, sign_bit);
459 Packet4f denormal_mask = _mm_and_ps(
460 _mm_cmpge_ps(_x, _mm_setzero_ps()),
468 return _mm_andnot_ps(denormal_mask,
pmul(_x,x));
495 Packet4f le_zero_mask = _mm_cmple_ps(_x, p4f_flt_min);
496 Packet4f x = _mm_andnot_ps(le_zero_mask, _mm_rsqrt_ps(_x));
499 Packet4f neg_mask = _mm_cmplt_ps(_x, _mm_setzero_ps());
500 Packet4f zero_mask = _mm_andnot_ps(neg_mask, le_zero_mask);
501 Packet4f infs_and_nans = _mm_or_ps(_mm_and_ps(neg_mask, p4f_nan),
502 _mm_and_ps(zero_mask, p4f_inf));
505 x =
pmul(x,
pmadd(neg_half,
pmul(x, x), p4f_one_point_five));
508 return _mm_or_ps(x, infs_and_nans);
549 #if EIGEN_COMP_GNUC_STRICT 562 #endif // EIGEN_MATH_FUNCTIONS_SSE_H static _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f)
#define EIGEN_ALWAYS_INLINE
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2d psqrt< Packet2d >(const Packet2d &x)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f psin< Packet4f >(const Packet4f &_x)
EIGEN_DEVICE_FUNC const SqrtReturnType sqrt() const
Namespace containing all symbols from the Eigen library.
Pose3 x2(Rot3::Ypr(0.0, 0.0, 0.0), l2)
T generic_fast_tanh_float(const T &a_x)
#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
EIGEN_DEVICE_FUNC Packet padd(const Packet &a, const Packet &b)
EIGEN_DEVICE_FUNC Packet pmin(const Packet &a, const Packet &b)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f ptanh< Packet4f >(const Packet4f &x)
RealScalar RealScalar * px
Point2(* f)(const Point3 &, OptionalJacobian< 2, 3 >)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f plog< Packet4f >(const Packet4f &_x)
EIGEN_DEVICE_FUNC unpacket_traits< Packet >::type pfirst(const Packet &a)
Array< double, 1, 3 > e(1./3., 0.5, 2.)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f pcos< Packet4f >(const Packet4f &_x)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f prsqrt< Packet4f >(const Packet4f &x)
Pose3 x3(Rot3::Ypr(M_PI/4.0, 0.0, 0.0), l2)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2d prsqrt< Packet2d >(const Packet2d &x)
EIGEN_DEVICE_FUNC Packet pdiv(const Packet &a, const Packet &b)
EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
EIGEN_DEVICE_FUNC Packet psub(const Packet &a, const Packet &b)
EIGEN_STRONG_INLINE Packet2d pset1< Packet2d >(const double &from)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f psqrt< Packet4f >(const Packet4f &x)
static _EIGEN_DECLARE_CONST_Packet4f(1, 1.0f)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f pexp< Packet4f >(const Packet4f &_x)
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy x
EIGEN_STRONG_INLINE Packet4f pset1< Packet4f >(const float &from)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2d pexp< Packet2d >(const Packet2d &_x)
EIGEN_DEVICE_FUNC Packet pmul(const Packet &a, const Packet &b)
static _EIGEN_DECLARE_CONST_Packet2d(1, 1.0)
EIGEN_DEVICE_FUNC Packet pmax(const Packet &a, const Packet &b)
EIGEN_DEVICE_FUNC Packet pand(const Packet &a, const Packet &b)
static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask,~0x7f800000)
EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f &a)