15 #ifndef EIGEN_MATH_FUNCTIONS_SSE_H    16 #define EIGEN_MATH_FUNCTIONS_SSE_H    55   Packet4f invalid_mask = _mm_cmpnge_ps(x, _mm_setzero_ps()); 
    56   Packet4f iszero_mask = _mm_cmpeq_ps(x, _mm_setzero_ps());
    58   x = 
pmax(x, p4f_min_norm_pos);  
    59   emm0 = _mm_srli_epi32(_mm_castps_si128(x), 23);
    62   x = _mm_and_ps(x, p4f_inv_mant_mask);
    63   x = _mm_or_ps(x, p4f_half);
    65   emm0 = _mm_sub_epi32(emm0, p4i_0x7f);
    74   Packet4f mask = _mm_cmplt_ps(x, p4f_cephes_SQRTHF);
    84   y  = 
pmadd(p4f_cephes_log_p0, x, p4f_cephes_log_p1);
    85   y1 = 
pmadd(p4f_cephes_log_p3, x, p4f_cephes_log_p4);
    86   y2 = 
pmadd(p4f_cephes_log_p6, x, p4f_cephes_log_p7);
    87   y  = 
pmadd(y , x, p4f_cephes_log_p2);
    88   y1 = 
pmadd(y1, x, p4f_cephes_log_p5);
    89   y2 = 
pmadd(y2, x, p4f_cephes_log_p8);
    94   y1 = 
pmul(e, p4f_cephes_log_q1);
    95   tmp = 
pmul(x2, p4f_half);
    98   y2 = 
pmul(e, p4f_cephes_log_q2);
   102   return _mm_or_ps(_mm_andnot_ps(iszero_mask, _mm_or_ps(x, invalid_mask)),
   103                    _mm_and_ps(iszero_mask, p4f_minus_inf));
   133   x = 
pmax(
pmin(x, p4f_exp_hi), p4f_exp_lo);
   136   fx = 
pmadd(x, p4f_cephes_LOG2EF, p4f_half);
   138 #ifdef EIGEN_VECTORIZE_SSE4_1   139   fx = _mm_floor_ps(fx);
   141   emm0 = _mm_cvttps_epi32(fx);
   142   tmp  = _mm_cvtepi32_ps(emm0);
   144   Packet4f mask = _mm_cmpgt_ps(tmp, fx);
   145   mask = _mm_and_ps(mask, p4f_1);
   146   fx = 
psub(tmp, mask);
   149   tmp = 
pmul(fx, p4f_cephes_exp_C1);
   157   y = 
pmadd(y, x, p4f_cephes_exp_p1);
   158   y = 
pmadd(y, x, p4f_cephes_exp_p2);
   159   y = 
pmadd(y, x, p4f_cephes_exp_p3);
   160   y = 
pmadd(y, x, p4f_cephes_exp_p4);
   161   y = 
pmadd(y, x, p4f_cephes_exp_p5);
   166   emm0 = _mm_cvttps_epi32(fx);
   167   emm0 = _mm_add_epi32(emm0, p4i_0x7f);
   168   emm0 = _mm_slli_epi32(emm0, 23);
   196   static const __m128i p4i_1023_0 = _mm_setr_epi32(1023, 1023, 0, 0);
   202   x = 
pmax(
pmin(x, p2d_exp_hi), p2d_exp_lo);
   204   fx = 
pmadd(p2d_cephes_LOG2EF, x, p2d_half);
   206 #ifdef EIGEN_VECTORIZE_SSE4_1   207   fx = _mm_floor_pd(fx);
   209   emm0 = _mm_cvttpd_epi32(fx);
   210   tmp  = _mm_cvtepi32_pd(emm0);
   212   Packet2d mask = _mm_cmpgt_pd(tmp, fx);
   213   mask = _mm_and_pd(mask, p2d_1);
   214   fx = 
psub(tmp, mask);
   217   tmp = 
pmul(fx, p2d_cephes_exp_C1);
   225   px = 
pmadd(px, x2, p2d_cephes_exp_p1);
   226   px = 
pmadd(px, x2, p2d_cephes_exp_p2);
   230   qx = 
pmadd(qx, x2, p2d_cephes_exp_q1);
   231   qx = 
pmadd(qx, x2, p2d_cephes_exp_q2);
   232   qx = 
pmadd(qx, x2, p2d_cephes_exp_q3);
   235   x = 
pmadd(p2d_2,x,p2d_1);
   238   emm0 = _mm_cvttpd_epi32(fx);
   239   emm0 = _mm_add_epi32(emm0, p4i_1023_0);
   240   emm0 = _mm_slli_epi32(emm0, 20);
   241   emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(1,2,0,3));
   282   Packet4f xmm1, xmm2, xmm3, sign_bit, y;
   292   sign_bit = _mm_and_ps(sign_bit, p4f_sign_mask);
   295   y = 
pmul(x, p4f_cephes_FOPI);
   298   emm2 = _mm_cvttps_epi32(y);
   300   emm2 = _mm_add_epi32(emm2, p4i_1);
   301   emm2 = _mm_and_si128(emm2, p4i_not1);
   302   y = _mm_cvtepi32_ps(emm2);
   304   emm0 = _mm_and_si128(emm2, p4i_4);
   305   emm0 = _mm_slli_epi32(emm0, 29);
   312   emm2 = _mm_and_si128(emm2, p4i_2);
   313   emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
   315   Packet4f swap_sign_bit = _mm_castsi128_ps(emm0);
   316   Packet4f poly_mask = _mm_castsi128_ps(emm2);
   317   sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
   321   xmm1 = 
pmul(y, p4f_minus_cephes_DP1);
   322   xmm2 = 
pmul(y, p4f_minus_cephes_DP2);
   323   xmm3 = 
pmul(y, p4f_minus_cephes_DP3);
   332   y = 
pmadd(y, z, p4f_coscof_p1);
   333   y = 
pmadd(y, z, p4f_coscof_p2);
   343   y2 = 
pmadd(y2, z, p4f_sincof_p1);
   344   y2 = 
pmadd(y2, z, p4f_sincof_p2);
   350   y2 = _mm_and_ps(poly_mask, y2);
   351   y = _mm_andnot_ps(poly_mask, y);
   354   return _mm_xor_ps(y, sign_bit);
   387   y = 
pmul(x, p4f_cephes_FOPI);
   390   emm2 = _mm_cvttps_epi32(y);
   392   emm2 = _mm_add_epi32(emm2, p4i_1);
   393   emm2 = _mm_and_si128(emm2, p4i_not1);
   394   y = _mm_cvtepi32_ps(emm2);
   396   emm2 = _mm_sub_epi32(emm2, p4i_2);
   399   emm0 = _mm_andnot_si128(emm2, p4i_4);
   400   emm0 = _mm_slli_epi32(emm0, 29);
   402   emm2 = _mm_and_si128(emm2, p4i_2);
   403   emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
   405   Packet4f sign_bit = _mm_castsi128_ps(emm0);
   406   Packet4f poly_mask = _mm_castsi128_ps(emm2);
   410   xmm1 = 
pmul(y, p4f_minus_cephes_DP1);
   411   xmm2 = 
pmul(y, p4f_minus_cephes_DP2);
   412   xmm3 = 
pmul(y, p4f_minus_cephes_DP3);
   421   y = 
pmadd(y,z,p4f_coscof_p1);
   422   y = 
pmadd(y,z,p4f_coscof_p2);
   425   Packet4f tmp = _mm_mul_ps(z, p4f_half);
   431   y2 = 
pmadd(y2, z, p4f_sincof_p1);
   432   y2 = 
pmadd(y2, z, p4f_sincof_p2);
   434   y2 = 
pmadd(y2, x, x);
   437   y2 = _mm_and_ps(poly_mask, y2);
   438   y  = _mm_andnot_ps(poly_mask, y);
   442   return _mm_xor_ps(y, sign_bit);
   459   Packet4f denormal_mask = _mm_and_ps(
   460       _mm_cmpge_ps(_x, _mm_setzero_ps()),
   468   return _mm_andnot_ps(denormal_mask, 
pmul(_x,x));
   495   Packet4f le_zero_mask = _mm_cmple_ps(_x, p4f_flt_min);
   496   Packet4f x = _mm_andnot_ps(le_zero_mask, _mm_rsqrt_ps(_x));
   499   Packet4f neg_mask = _mm_cmplt_ps(_x, _mm_setzero_ps());
   500   Packet4f zero_mask = _mm_andnot_ps(neg_mask, le_zero_mask);
   501   Packet4f infs_and_nans = _mm_or_ps(_mm_and_ps(neg_mask, p4f_nan),
   502                                      _mm_and_ps(zero_mask, p4f_inf));
   505   x = 
pmul(x, 
pmadd(neg_half, 
pmul(x, x), p4f_one_point_five));
   508   return _mm_or_ps(x, infs_and_nans);
   549 #if EIGEN_COMP_GNUC_STRICT   562 #endif // EIGEN_MATH_FUNCTIONS_SSE_H static _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f)
#define EIGEN_ALWAYS_INLINE
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2d psqrt< Packet2d >(const Packet2d &x)
static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f psin< Packet4f >(const Packet4f &_x)
EIGEN_DEVICE_FUNC const SqrtReturnType sqrt() const
T generic_fast_tanh_float(const T &a_x)
#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
EIGEN_DEVICE_FUNC Packet padd(const Packet &a, const Packet &b)
EIGEN_DEVICE_FUNC Packet pmin(const Packet &a, const Packet &b)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f ptanh< Packet4f >(const Packet4f &x)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f plog< Packet4f >(const Packet4f &_x)
EIGEN_DEVICE_FUNC unpacket_traits< Packet >::type pfirst(const Packet &a)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f pcos< Packet4f >(const Packet4f &_x)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f prsqrt< Packet4f >(const Packet4f &x)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2d prsqrt< Packet2d >(const Packet2d &x)
EIGEN_DEVICE_FUNC Packet pdiv(const Packet &a, const Packet &b)
EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
EIGEN_DEVICE_FUNC Packet psub(const Packet &a, const Packet &b)
EIGEN_STRONG_INLINE Packet2d pset1< Packet2d >(const double &from)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f psqrt< Packet4f >(const Packet4f &x)
static _EIGEN_DECLARE_CONST_Packet4f(1, 1.0f)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f pexp< Packet4f >(const Packet4f &_x)
EIGEN_STRONG_INLINE Packet4f pset1< Packet4f >(const float &from)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2d pexp< Packet2d >(const Packet2d &_x)
EIGEN_DEVICE_FUNC Packet pmul(const Packet &a, const Packet &b)
static _EIGEN_DECLARE_CONST_Packet2d(1, 1.0)
EIGEN_DEVICE_FUNC Packet pmax(const Packet &a, const Packet &b)
EIGEN_DEVICE_FUNC Packet pand(const Packet &a, const Packet &b)
EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f &a)