10 #ifndef EIGEN_MATH_FUNCTIONS_AVX_H    11 #define EIGEN_MATH_FUNCTIONS_AVX_H    23 #ifdef EIGEN_VECTORIZE_AVX2    24   return _mm256_slli_epi32(v, n);
    26   __m128i lo = _mm_slli_epi32(_mm256_extractf128_si256(v, 0), n);
    27   __m128i hi = _mm_slli_epi32(_mm256_extractf128_si256(v, 1), n);
    28   return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
    34 #ifdef EIGEN_VECTORIZE_AVX2    35   return _mm256_cvtepi32_ps(_mm256_srli_epi32(_mm256_castps_si256(v), n));
    37   __m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 0), n);
    38   __m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 1), n);
    39   return _mm256_cvtepi32_ps(_mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1));
    65   Packet8f shift = _mm256_floor_ps(
padd(z, p8f_one_over_four));
    66   x = 
pmadd(shift, p8f_neg_pi_first, x);
    67   x = 
pmadd(shift, p8f_neg_pi_second, x);
    68   x = 
pmadd(shift, p8f_neg_pi_third, x);
    69   z = 
pmul(x, p8f_four_over_pi);
    73   Packet8i shift_ints = _mm256_cvtps_epi32(shift);
    74   Packet8i shift_isodd = _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(shift_ints), _mm256_castsi256_ps(p8i_one)));
    79   Packet8f ival_mask = _mm256_cmp_ps(z, p8f_one, _CMP_GT_OQ);
    88   Packet8f right = 
pmadd(p8f_coeff_right_6, z_minus_two2, p8f_coeff_right_4);
    89   right = 
pmadd(right, z_minus_two2, p8f_coeff_right_2);
    90   right = 
pmadd(right, z_minus_two2, p8f_coeff_right_0);
    98   Packet8f left = 
pmadd(p8f_coeff_left_7, z2, p8f_coeff_left_5);
    99   left = 
pmadd(left, z2, p8f_coeff_left_3);
   100   left = 
pmadd(left, z2, p8f_coeff_left_1);
   101   left = 
pmul(left, z);
   104   left = _mm256_andnot_ps(ival_mask, left);
   105   right = _mm256_and_ps(ival_mask, right);
   106   Packet8f res = _mm256_or_ps(left, right);
   109   res = _mm256_xor_ps(res, _mm256_castsi256_ps(sign_flip_mask));
   147   Packet8f invalid_mask = _mm256_cmp_ps(x, _mm256_setzero_ps(), _CMP_NGE_UQ); 
   148   Packet8f iszero_mask = _mm256_cmp_ps(x, _mm256_setzero_ps(), _CMP_EQ_OQ);
   151   x = 
pmax(x, p8f_min_norm_pos);
   154   Packet8f e = _mm256_sub_ps(emm0, p8f_126f);
   157   x = _mm256_and_ps(x, p8f_inv_mant_mask);
   158   x = _mm256_or_ps(x, p8f_half);
   167   Packet8f mask = _mm256_cmp_ps(x, p8f_cephes_SQRTHF, _CMP_LT_OQ);
   168   Packet8f tmp = _mm256_and_ps(x, mask);
   170   e = 
psub(e, _mm256_and_ps(p8f_1, mask));
   179   y = 
pmadd(p8f_cephes_log_p0, x, p8f_cephes_log_p1);
   180   y1 = 
pmadd(p8f_cephes_log_p3, x, p8f_cephes_log_p4);
   181   y2 = 
pmadd(p8f_cephes_log_p6, x, p8f_cephes_log_p7);
   182   y = 
pmadd(y, x, p8f_cephes_log_p2);
   183   y1 = 
pmadd(y1, x, p8f_cephes_log_p5);
   184   y2 = 
pmadd(y2, x, p8f_cephes_log_p8);
   185   y = 
pmadd(y, x3, y1);
   186   y = 
pmadd(y, x3, y2);
   190   y1 = 
pmul(e, p8f_cephes_log_q1);
   191   tmp = 
pmul(x2, p8f_half);
   194   y2 = 
pmul(e, p8f_cephes_log_q2);
   200       _mm256_andnot_ps(iszero_mask, _mm256_or_ps(x, invalid_mask)),
   201       _mm256_and_ps(iszero_mask, p8f_minus_inf));
   231   Packet8f m = _mm256_floor_ps(
pmadd(x, p8f_cephes_LOG2EF, p8f_half));
   237 #ifdef EIGEN_VECTORIZE_FMA   239   Packet8f r = _mm256_fmadd_ps(m, p8f_nln2, x);
   244   r = 
psub(r, 
pmul(m, p8f_cephes_exp_C2));
   252   y = 
pmadd(y, r, p8f_cephes_exp_p1);
   253   y = 
pmadd(y, r, p8f_cephes_exp_p2);
   254   y = 
pmadd(y, r, p8f_cephes_exp_p3);
   255   y = 
pmadd(y, r, p8f_cephes_exp_p4);
   256   y = 
pmadd(y, r, p8f_cephes_exp_p5);
   265   return pmax(
pmul(y, _mm256_castsi256_ps(emm0)), _x);
   305   x = 
pmax(
pmin(x, p4d_exp_hi), p4d_exp_lo);
   307   fx = 
pmadd(p4d_cephes_LOG2EF, x, p4d_half);
   310   fx = _mm256_floor_pd(fx);
   315   tmp = 
pmul(fx, p4d_cephes_exp_C1);
   324   px = 
pmadd(px, x2, p4d_cephes_exp_p1);
   325   px = 
pmadd(px, x2, p4d_cephes_exp_p2);
   330   qx = 
pmadd(qx, x2, p4d_cephes_exp_q1);
   331   qx = 
pmadd(qx, x2, p4d_cephes_exp_q2);
   332   qx = 
pmadd(qx, x2, p4d_cephes_exp_q3);
   337   x = _mm256_div_pd(px, 
psub(qx, px));
   338   x = 
pmadd(p4d_2, x, p4d_1);
   342   __m128i emm0 = _mm256_cvtpd_epi32(fx);
   343   emm0 = _mm_add_epi32(emm0, p4i_1023);
   344   emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(3, 1, 2, 0));
   345   __m128i lo = _mm_slli_epi64(emm0, 52);
   346   __m128i hi = _mm_slli_epi64(_mm_srli_epi64(emm0, 32), 52);
   347   __m256i e = _mm256_insertf128_si256(_mm256_setzero_si256(), lo, 0);
   348   e = _mm256_insertf128_si256(e, hi, 1);
   352   return pmax(
pmul(x, _mm256_castsi256_pd(e)), _x);
   368   Packet8f denormal_mask = _mm256_and_ps(
   371       _mm256_cmp_ps(_x, _mm256_setzero_ps(), _CMP_GE_OQ));
   378   return _mm256_andnot_ps(denormal_mask, 
pmul(_x,x));
   383   return _mm256_sqrt_ps(x);
   388   return _mm256_sqrt_pd(x);
   404   Packet8f le_zero_mask = _mm256_cmp_ps(_x, p8f_flt_min, _CMP_LT_OQ);
   405   Packet8f x = _mm256_andnot_ps(le_zero_mask, _mm256_rsqrt_ps(_x));
   408   Packet8f neg_mask = _mm256_cmp_ps(_x, _mm256_setzero_ps(), _CMP_LT_OQ);
   409   Packet8f zero_mask = _mm256_andnot_ps(neg_mask, le_zero_mask);
   410   Packet8f infs_and_nans = _mm256_or_ps(_mm256_and_ps(neg_mask, p8f_nan),
   411                                         _mm256_and_ps(zero_mask, p8f_inf));
   414   x = 
pmul(x, 
pmadd(neg_half, 
pmul(x, x), p8f_one_point_five));
   417   return _mm256_or_ps(x, infs_and_nans);
   424   return _mm256_div_ps(p8f_one, _mm256_sqrt_ps(x));
   431   return _mm256_div_pd(p4d_one, _mm256_sqrt_pd(x));
   439 #endif  // EIGEN_MATH_FUNCTIONS_AVX_H static _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f ptanh< Packet8f >(const Packet8f &x)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d pexp< Packet4d >(const Packet4d &_x)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d psqrt< Packet4d >(const Packet4d &x)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f prsqrt< Packet8f >(const Packet8f &x)
T generic_fast_tanh_float(const T &a_x)
#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
#define _EIGEN_DECLARE_CONST_Packet8i(NAME, X)
#define _EIGEN_DECLARE_CONST_Packet8f(NAME, X)
EIGEN_DEVICE_FUNC Packet padd(const Packet &a, const Packet &b)
EIGEN_DEVICE_FUNC Packet pmin(const Packet &a, const Packet &b)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f psin< Packet8f >(const Packet8f &_x)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f plog< Packet8f >(const Packet8f &_x)
#define _EIGEN_DECLARE_CONST_Packet4d(NAME, X)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d prsqrt< Packet4d >(const Packet4d &x)
__vector short int Packet8i
#define _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(NAME, X)
EIGEN_STRONG_INLINE Packet8f pset1< Packet8f >(const float &from)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f pexp< Packet8f >(const Packet8f &_x)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f psqrt< Packet8f >(const Packet8f &x)
EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
EIGEN_DEVICE_FUNC Packet psub(const Packet &a, const Packet &b)
Packet8f pshiftright(Packet8f v, int n)
Packet8i pshiftleft(Packet8i v, int n)
EIGEN_DEVICE_FUNC Packet pmul(const Packet &a, const Packet &b)
EIGEN_DEVICE_FUNC Packet pmax(const Packet &a, const Packet &b)