10 #ifndef THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_ 11 #define THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_ 18 #if EIGEN_GNUC_AT_LEAST(5, 3) || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC >= 1923 20 #define _EIGEN_DECLARE_CONST_Packet16f(NAME, X) \ 21 const Packet16f p16f_##NAME = pset1<Packet16f>(X) 23 #define _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(NAME, X) \ 24 const Packet16f p16f_##NAME = preinterpret<Packet16f,Packet16i>(pset1<Packet16i>(X)) 26 #define _EIGEN_DECLARE_CONST_Packet8d(NAME, X) \ 27 const Packet8d p8d_##NAME = pset1<Packet8d>(X) 29 #define _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(NAME, X) \ 30 const Packet8d p8d_##NAME = _mm512_castsi512_pd(_mm512_set1_epi64(X)) 32 #define _EIGEN_DECLARE_CONST_Packet16bf(NAME, X) \ 33 const Packet16bf p16bf_##NAME = pset1<Packet16bf>(X) 35 #define _EIGEN_DECLARE_CONST_Packet16bf_FROM_INT(NAME, X) \ 36 const Packet16bf p16bf_##NAME = preinterpret<Packet16bf,Packet16i>(pset1<Packet16i>(X)) 61 plog2<Packet8d>(
const Packet8d& _x) {
74 _EIGEN_DECLARE_CONST_Packet16f(1, 1.0
f);
75 _EIGEN_DECLARE_CONST_Packet16f(half, 0.5
f);
76 _EIGEN_DECLARE_CONST_Packet16f(127, 127.0
f);
78 _EIGEN_DECLARE_CONST_Packet16f(exp_hi, 88.3762626647950
f);
79 _EIGEN_DECLARE_CONST_Packet16f(exp_lo, -88.3762626647949
f);
81 _EIGEN_DECLARE_CONST_Packet16f(cephes_LOG2EF, 1.44269504088896341
f);
83 _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p0, 1.9875691500
E-4
f);
84 _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p1, 1.3981999507
E-3
f);
85 _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p2, 8.3334519073
E-3
f);
86 _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p3, 4.1665795894
E-2
f);
87 _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p4, 1.6666665459
E-1
f);
88 _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p5, 5.0000001201
E-1
f);
99 _EIGEN_DECLARE_CONST_Packet16f(nln2, -0.6931471805599453
f);
100 Packet16f r = _mm512_fmadd_ps(m, p16f_nln2, x);
106 y =
pmadd(p16f_cephes_exp_p0, r, p16f_cephes_exp_p1);
107 y1 =
pmadd(p16f_cephes_exp_p3, r, p16f_cephes_exp_p4);
108 y2 =
padd(r, p16f_1);
109 y =
pmadd(y, r, p16f_cephes_exp_p2);
110 y1 =
pmadd(y1, r, p16f_cephes_exp_p5);
111 y =
pmadd(y, r3, y1);
112 y =
pmadd(y, r2, y2);
116 emm0 = _mm512_slli_epi32(emm0, 23);
119 return pmax(
pmul(y, _mm512_castsi512_ps(emm0)), _x);
124 pexp<Packet8d>(
const Packet8d& _x) {
168 __mmask16 denormal_mask = _mm512_kand(
171 _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_GE_OQ));
179 return _mm512_mask_blend_ps(denormal_mask,
pmul(_x,x), _mm512_setzero_ps());
184 psqrt<Packet8d>(
const Packet8d& _x) {
186 __mmask16 denormal_mask = _mm512_kand(
189 _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_GE_OQ));
199 return _mm512_mask_blend_pd(denormal_mask,
pmul(_x,x), _mm512_setzero_pd());
204 return _mm512_sqrt_ps(x);
209 return _mm512_sqrt_pd(x);
217 #if defined(EIGEN_VECTORIZE_AVX512ER) 221 return _mm512_rsqrt28_ps(x);
223 #elif EIGEN_FAST_MATH 228 _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(
inf, 0x7f800000);
229 _EIGEN_DECLARE_CONST_Packet16f(one_point_five, 1.5
f);
230 _EIGEN_DECLARE_CONST_Packet16f(minus_half, -0.5
f);
235 __mmask16 inf_mask = _mm512_cmp_ps_mask(_x, p16f_inf, _CMP_EQ_OQ);
236 __mmask16 not_pos_mask = _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_LE_OQ);
237 __mmask16 not_finite_pos_mask = not_pos_mask | inf_mask;
241 Packet16f y_approx = _mm512_rsqrt14_ps(_x);
252 return _mm512_mask_blend_ps(not_finite_pos_mask, y_newton, y_approx);
258 _EIGEN_DECLARE_CONST_Packet16f(one, 1.0
f);
259 return _mm512_div_ps(p16f_one, _mm512_sqrt_ps(x));
270 prsqrt<Packet8d>(
const Packet8d& _x) {
271 _EIGEN_DECLARE_CONST_Packet8d(one_point_five, 1.5);
272 _EIGEN_DECLARE_CONST_Packet8d(minus_half, -0.5);
273 _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(
inf, 0x7ff0000000000000LL);
278 __mmask8 inf_mask = _mm512_cmp_pd_mask(_x, p8d_inf, _CMP_EQ_OQ);
279 __mmask8 not_pos_mask = _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_LE_OQ);
280 __mmask8 not_finite_pos_mask = not_pos_mask | inf_mask;
284 #if defined(EIGEN_VECTORIZE_AVX512ER) 285 Packet8d y_approx = _mm512_rsqrt28_pd(_x);
287 Packet8d y_approx = _mm512_rsqrt14_pd(_x);
297 #if !defined(EIGEN_VECTORIZE_AVX512ER) 298 y_newton =
pmul(y_newton,
pmadd(y_newton,
pmul(neg_half, y_newton), p8d_one_point_five));
303 return _mm512_mask_blend_pd(not_finite_pos_mask, y_newton, y_approx);
308 _EIGEN_DECLARE_CONST_Packet8d(one, 1.0
f);
309 return _mm512_div_pd(p8d_one, _mm512_sqrt_pd(x));
362 #endif // THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_ EIGEN_STRONG_INLINE Packet8f half2float(const Packet8h &a)
EIGEN_STRONG_INLINE Packet8f Bf16ToF32(const Packet8bf &a)
EIGEN_STRONG_INLINE Packet8bf F32ToBf16(Packet4f p4f)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet plog2_double(const Packet _x)
#define EIGEN_STRONG_INLINE
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog2(const Packet &a)
std::ofstream out("Result.txt")
EIGEN_STRONG_INLINE Packet16f pset1< Packet16f >(const float &from)
Namespace containing all symbols from the Eigen library.
T generic_fast_tanh_float(const T &a_x)
EIGEN_STRONG_INLINE Packet8h pldexp(const Packet8h &a, const Packet8h &exponent)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptanh(const Packet &a)
#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
EIGEN_STRONG_INLINE Packet16f pfrexp< Packet16f >(const Packet16f &a, Packet16f &exponent)
#define BF16_PACKET_FUNCTION(PACKET_F, PACKET_BF16, METHOD)
EIGEN_STRONG_INLINE Packet8d pset1< Packet8d >(const double &from)
EIGEN_DEVICE_FUNC Packet padd(const Packet &a, const Packet &b)
EIGEN_DEVICE_FUNC Packet pmin(const Packet &a, const Packet &b)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet &a)
eigen_packet_wrapper< __m256i, 2 > Packet16bf
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog1p(const Packet &a)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet plog_float(const Packet _x)
EIGEN_STRONG_INLINE Packet8h pfrexp(const Packet8h &a, Packet8h &exponent)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet pexp_double(const Packet _x)
Packet generic_plog1p(const Packet &x)
eigen_packet_wrapper< __m256i, 1 > Packet16h
Point2(* f)(const Point3 &, OptionalJacobian< 2, 3 >)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet psin_float(const Packet &x)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet plog_double(const Packet _x)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f ptanh< Packet16f >(const Packet16f &_x)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f psin< Packet16f >(const Packet16f &_x)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f pcos< Packet16f >(const Packet16f &_x)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog(const Packet &a)
EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
EIGEN_STRONG_INLINE Packet4f psqrt(const Packet4f &a)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcos(const Packet &a)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet plog2_float(const Packet _x)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexpm1(const Packet &a)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psin(const Packet &a)
EIGEN_STRONG_INLINE Packet4f prsqrt(const Packet4f &a)
EIGEN_STRONG_INLINE Packet8h float2half(const Packet8f &a)
Packet generic_expm1(const Packet &x)
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy x
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet pcos_float(const Packet &x)
EIGEN_DEVICE_FUNC Packet pmul(const Packet &a, const Packet &b)
EIGEN_DEVICE_FUNC Packet pmax(const Packet &a, const Packet &b)
#define F16_PACKET_FUNCTION(PACKET_F, PACKET_F16, METHOD)
EIGEN_STRONG_INLINE Packet16f pldexp< Packet16f >(const Packet16f &a, const Packet16f &exponent)