10 #ifndef EIGEN_MATH_FUNCTIONS_AVX_H 11 #define EIGEN_MATH_FUNCTIONS_AVX_H 23 #ifdef EIGEN_VECTORIZE_AVX2 24 return _mm256_slli_epi32(v, n);
26 __m128i lo = _mm_slli_epi32(_mm256_extractf128_si256(v, 0), n);
27 __m128i hi = _mm_slli_epi32(_mm256_extractf128_si256(v, 1), n);
28 return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
34 #ifdef EIGEN_VECTORIZE_AVX2 35 return _mm256_cvtepi32_ps(_mm256_srli_epi32(_mm256_castps_si256(v), n));
37 __m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 0), n);
38 __m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 1), n);
39 return _mm256_cvtepi32_ps(_mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1));
65 Packet8f shift = _mm256_floor_ps(
padd(z, p8f_one_over_four));
66 x =
pmadd(shift, p8f_neg_pi_first, x);
67 x =
pmadd(shift, p8f_neg_pi_second, x);
68 x =
pmadd(shift, p8f_neg_pi_third, x);
69 z =
pmul(x, p8f_four_over_pi);
73 Packet8i shift_ints = _mm256_cvtps_epi32(shift);
74 Packet8i shift_isodd = _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(shift_ints), _mm256_castsi256_ps(p8i_one)));
79 Packet8f ival_mask = _mm256_cmp_ps(z, p8f_one, _CMP_GT_OQ);
89 right =
pmadd(right, z_minus_two2, p8f_coeff_right_2);
90 right =
pmadd(right, z_minus_two2, p8f_coeff_right_0);
99 left =
pmadd(left, z2, p8f_coeff_left_3);
100 left =
pmadd(left, z2, p8f_coeff_left_1);
101 left =
pmul(left, z);
104 left = _mm256_andnot_ps(ival_mask, left);
105 right = _mm256_and_ps(ival_mask, right);
109 res = _mm256_xor_ps(res, _mm256_castsi256_ps(sign_flip_mask));
147 Packet8f invalid_mask = _mm256_cmp_ps(x, _mm256_setzero_ps(), _CMP_NGE_UQ);
148 Packet8f iszero_mask = _mm256_cmp_ps(x, _mm256_setzero_ps(), _CMP_EQ_OQ);
151 x =
pmax(x, p8f_min_norm_pos);
154 Packet8f e = _mm256_sub_ps(emm0, p8f_126f);
157 x = _mm256_and_ps(x, p8f_inv_mant_mask);
158 x = _mm256_or_ps(x, p8f_half);
167 Packet8f mask = _mm256_cmp_ps(x, p8f_cephes_SQRTHF, _CMP_LT_OQ);
168 Packet8f tmp = _mm256_and_ps(x, mask);
170 e =
psub(e, _mm256_and_ps(p8f_1, mask));
179 y =
pmadd(p8f_cephes_log_p0, x, p8f_cephes_log_p1);
180 y1 =
pmadd(p8f_cephes_log_p3, x, p8f_cephes_log_p4);
181 y2 =
pmadd(p8f_cephes_log_p6, x, p8f_cephes_log_p7);
182 y =
pmadd(y, x, p8f_cephes_log_p2);
183 y1 =
pmadd(y1, x, p8f_cephes_log_p5);
184 y2 =
pmadd(y2, x, p8f_cephes_log_p8);
185 y =
pmadd(y, x3, y1);
186 y =
pmadd(y, x3, y2);
190 y1 =
pmul(e, p8f_cephes_log_q1);
191 tmp =
pmul(x2, p8f_half);
194 y2 =
pmul(e, p8f_cephes_log_q2);
200 _mm256_andnot_ps(iszero_mask, _mm256_or_ps(x, invalid_mask)),
201 _mm256_and_ps(iszero_mask, p8f_minus_inf));
231 Packet8f m = _mm256_floor_ps(
pmadd(x, p8f_cephes_LOG2EF, p8f_half));
237 #ifdef EIGEN_VECTORIZE_FMA 239 Packet8f r = _mm256_fmadd_ps(m, p8f_nln2, x);
244 r =
psub(r,
pmul(m, p8f_cephes_exp_C2));
252 y =
pmadd(y, r, p8f_cephes_exp_p1);
253 y =
pmadd(y, r, p8f_cephes_exp_p2);
254 y =
pmadd(y, r, p8f_cephes_exp_p3);
255 y =
pmadd(y, r, p8f_cephes_exp_p4);
256 y =
pmadd(y, r, p8f_cephes_exp_p5);
265 return pmax(
pmul(y, _mm256_castsi256_ps(emm0)), _x);
305 x =
pmax(
pmin(x, p4d_exp_hi), p4d_exp_lo);
307 fx =
pmadd(p4d_cephes_LOG2EF, x, p4d_half);
310 fx = _mm256_floor_pd(fx);
315 tmp =
pmul(fx, p4d_cephes_exp_C1);
324 px =
pmadd(px, x2, p4d_cephes_exp_p1);
325 px =
pmadd(px, x2, p4d_cephes_exp_p2);
330 qx =
pmadd(qx, x2, p4d_cephes_exp_q1);
331 qx =
pmadd(qx, x2, p4d_cephes_exp_q2);
332 qx =
pmadd(qx, x2, p4d_cephes_exp_q3);
337 x = _mm256_div_pd(px,
psub(qx, px));
338 x =
pmadd(p4d_2, x, p4d_1);
342 __m128i emm0 = _mm256_cvtpd_epi32(fx);
343 emm0 = _mm_add_epi32(emm0, p4i_1023);
344 emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(3, 1, 2, 0));
345 __m128i lo = _mm_slli_epi64(emm0, 52);
346 __m128i hi = _mm_slli_epi64(_mm_srli_epi64(emm0, 32), 52);
347 __m256i
e = _mm256_insertf128_si256(_mm256_setzero_si256(), lo, 0);
348 e = _mm256_insertf128_si256(e, hi, 1);
352 return pmax(
pmul(x, _mm256_castsi256_pd(e)), _x);
368 Packet8f denormal_mask = _mm256_and_ps(
371 _mm256_cmp_ps(_x, _mm256_setzero_ps(), _CMP_GE_OQ));
378 return _mm256_andnot_ps(denormal_mask,
pmul(_x,x));
383 return _mm256_sqrt_ps(
x);
388 return _mm256_sqrt_pd(
x);
404 Packet8f le_zero_mask = _mm256_cmp_ps(_x, p8f_flt_min, _CMP_LT_OQ);
405 Packet8f x = _mm256_andnot_ps(le_zero_mask, _mm256_rsqrt_ps(_x));
408 Packet8f neg_mask = _mm256_cmp_ps(_x, _mm256_setzero_ps(), _CMP_LT_OQ);
409 Packet8f zero_mask = _mm256_andnot_ps(neg_mask, le_zero_mask);
410 Packet8f infs_and_nans = _mm256_or_ps(_mm256_and_ps(neg_mask, p8f_nan),
411 _mm256_and_ps(zero_mask, p8f_inf));
414 x =
pmul(x,
pmadd(neg_half,
pmul(x, x), p8f_one_point_five));
417 return _mm256_or_ps(x, infs_and_nans);
424 return _mm256_div_ps(p8f_one, _mm256_sqrt_ps(
x));
431 return _mm256_div_pd(p4d_one, _mm256_sqrt_pd(
x));
439 #endif // EIGEN_MATH_FUNCTIONS_AVX_H
static _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f ptanh< Packet8f >(const Packet8f &x)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d pexp< Packet4d >(const Packet4d &_x)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d psqrt< Packet4d >(const Packet4d &x)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f prsqrt< Packet8f >(const Packet8f &x)
Namespace containing all symbols from the Eigen library.
Pose3 x2(Rot3::Ypr(0.0, 0.0, 0.0), l2)
T generic_fast_tanh_float(const T &a_x)
#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
#define _EIGEN_DECLARE_CONST_Packet8i(NAME, X)
#define _EIGEN_DECLARE_CONST_Packet8f(NAME, X)
EIGEN_DEVICE_FUNC Packet padd(const Packet &a, const Packet &b)
EIGEN_DEVICE_FUNC Packet pmin(const Packet &a, const Packet &b)
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f psin< Packet8f >(const Packet8f &_x)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f plog< Packet8f >(const Packet8f &_x)
#define _EIGEN_DECLARE_CONST_Packet4d(NAME, X)
RealScalar RealScalar * px
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d prsqrt< Packet4d >(const Packet4d &x)
Point2(* f)(const Point3 &, OptionalJacobian< 2, 3 >)
__vector short int Packet8i
Array< double, 1, 3 > e(1./3., 0.5, 2.)
#define _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(NAME, X)
EIGEN_STRONG_INLINE Packet8f pset1< Packet8f >(const float &from)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f pexp< Packet8f >(const Packet8f &_x)
Pose3 x3(Rot3::Ypr(M_PI/4.0, 0.0, 0.0), l2)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f psqrt< Packet8f >(const Packet8f &x)
EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
EIGEN_DEVICE_FUNC Packet psub(const Packet &a, const Packet &b)
Packet8f pshiftright(Packet8f v, int n)
Packet8i pshiftleft(Packet8i v, int n)
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy x
EIGEN_DEVICE_FUNC Packet pmul(const Packet &a, const Packet &b)
EIGEN_DEVICE_FUNC Packet pmax(const Packet &a, const Packet &b)