11 #ifndef EIGEN_COMPLEX_NEON_H 12 #define EIGEN_COMPLEX_NEON_H 21 uint32x4_t ret = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
24 static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
25 return vld1q_u32( conj_XOR_DATA );
30 static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000 };
31 return vld1_u32( conj_XOR_DATA );
70 r64 = vld1_f32((
float *)&from);
89 v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0));
91 v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1));
93 v1 = vmulq_f32(v1,
b.v);
95 v2 = vmulq_f32(v2,
b.v);
97 v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2),
p4ui_CONJ_XOR()));
106 return Packet2cf(vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(
b.v))));
110 return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(
b.v))));
114 return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(
b.v))));
118 return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(
b.v))));
129 template<> EIGEN_DEVICE_FUNC
inline Packet2cf pgather<std::complex<float>,
Packet2cf>(
const std::complex<float>* from,
Index stride)
132 res = vsetq_lane_f32(
std::real(from[0*stride]), res, 0);
133 res = vsetq_lane_f32(
std::imag(from[0*stride]), res, 1);
134 res = vsetq_lane_f32(
std::real(from[1*stride]), res, 2);
135 res = vsetq_lane_f32(
std::imag(from[1*stride]), res, 3);
139 template<> EIGEN_DEVICE_FUNC
inline void pscatter<std::complex<float>,
Packet2cf>(std::complex<float>* to,
const Packet2cf& from,
Index stride)
141 to[stride*0] = std::complex<float>(vgetq_lane_f32(from.v, 0), vgetq_lane_f32(from.v, 1));
142 to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
150 vst1q_f32((
float *)x, a.v);
156 float32x2_t a_lo, a_hi;
159 a_lo = vget_low_f32(a.
v);
160 a_hi = vget_high_f32(a.
v);
161 a_r128 = vcombine_f32(a_hi, a_lo);
174 std::complex<float>
s;
176 a1 = vget_low_f32(a.
v);
177 a2 = vget_high_f32(a.
v);
178 a2 = vadd_f32(a1, a2);
179 vst1_f32((
float *)&s, a2);
189 sum1 = vcombine_f32(vget_low_f32(vecs[0].
v), vget_low_f32(vecs[1].v));
190 sum2 = vcombine_f32(vget_high_f32(vecs[0].v), vget_high_f32(vecs[1].v));
191 sum = vaddq_f32(sum1, sum2);
198 float32x2_t a1, a2, v1, v2, prod;
199 std::complex<float>
s;
201 a1 = vget_low_f32(a.
v);
202 a2 = vget_high_f32(a.
v);
204 v1 = vdup_lane_f32(a1, 0);
206 v2 = vdup_lane_f32(a1, 1);
208 v1 = vmul_f32(v1, a2);
210 v2 = vmul_f32(v2, a2);
212 v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2),
p2ui_CONJ_XOR()));
216 prod = vadd_f32(v1, v2);
218 vst1_f32((
float *)&s, prod);
230 first.
v = vextq_f32(first.
v, second.
v, 2);
235 template<>
struct conj_helper<Packet2cf, Packet2cf, false,true>
246 template<>
struct conj_helper<Packet2cf, Packet2cf, true,false>
257 template<>
struct conj_helper<Packet2cf, Packet2cf, true,true>
275 s = vmulq_f32(
b.v,
b.v);
276 rev_s = vrev64q_f32(s);
281 EIGEN_DEVICE_FUNC
inline void 283 Packet4f tmp = vcombine_f32(vget_high_f32(kernel.
packet[0].v), vget_high_f32(kernel.
packet[1].v));
284 kernel.
packet[0].v = vcombine_f32(vget_low_f32(kernel.
packet[0].v), vget_low_f32(kernel.
packet[1].v));
289 #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG 293 static uint64x2_t p2ul_CONJ_XOR = {0x0, 0x8000000000000000};
295 const uint64_t p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 };
296 static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );
340 template<>
EIGEN_STRONG_INLINE Packet1cd
pconj(
const Packet1cd& a) {
return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.
v), p2ul_CONJ_XOR))); }
347 v1 = vdupq_lane_f64(vget_low_f64(a.v), 0);
349 v2 = vdupq_lane_f64(vget_high_f64(a.v), 0);
351 v1 = vmulq_f64(v1,
b.v);
353 v2 = vmulq_f64(v2,
b.v);
355 v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(v2), p2ul_CONJ_XOR));
357 v2 = preverse<Packet2d>(v2);
359 return Packet1cd(vaddq_f64(v1, v2));
364 return Packet1cd(vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(
b.v))));
368 return Packet1cd(vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(
b.v))));
372 return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(
b.v))));
376 return Packet1cd(vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(
b.v))));
386 template<> EIGEN_DEVICE_FUNC
inline Packet1cd pgather<std::complex<double>, Packet1cd>(
const std::complex<double>* from,
Index stride)
389 res = vsetq_lane_f64(
std::real(from[0*stride]), res, 0);
390 res = vsetq_lane_f64(
std::imag(from[0*stride]), res, 1);
391 return Packet1cd(res);
394 template<> EIGEN_DEVICE_FUNC
inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to,
const Packet1cd& from,
Index stride)
396 to[stride*0] = std::complex<double>(vgetq_lane_f64(from.v, 0), vgetq_lane_f64(from.v, 1));
403 pstore<std::complex<double> >(&res, a);
426 template<>
struct conj_helper<Packet1cd, Packet1cd, false,true>
437 template<>
struct conj_helper<Packet1cd, Packet1cd, true,false>
448 template<>
struct conj_helper<Packet1cd, Packet1cd, true,true>
464 Packet2d rev_s = preverse<Packet2d>(s);
476 Packet2d tmp = vcombine_f64(vget_high_f64(kernel.
packet[0].v), vget_high_f64(kernel.
packet[1].v));
477 kernel.
packet[0].v = vcombine_f64(vget_low_f64(kernel.
packet[0].v), vget_low_f64(kernel.
packet[1].v));
480 #endif // EIGEN_ARCH_ARM64 486 #endif // EIGEN_COMPLEX_NEON_H EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf &a, const Packet2cf &b) const
EIGEN_STRONG_INLINE Packet1cd ploaddup< Packet1cd >(const std::complex< double > *from)
#define EIGEN_STRONG_INLINE
EIGEN_DEVICE_FUNC RealReturnType real() const
EIGEN_STRONG_INLINE Packet2cf por< Packet2cf >(const Packet2cf &a, const Packet2cf &b)
EIGEN_STRONG_INLINE Packet2cf preduxp< Packet2cf >(const Packet2cf *vecs)
EIGEN_STRONG_INLINE Packet1cd padd< Packet1cd >(const Packet1cd &a, const Packet1cd &b)
EIGEN_STRONG_INLINE Packet1cd preduxp< Packet1cd >(const Packet1cd *vecs)
#define EIGEN_DEBUG_UNALIGNED_LOAD
EIGEN_STRONG_INLINE Packet2cf pmul< Packet2cf >(const Packet2cf &a, const Packet2cf &b)
EIGEN_STRONG_INLINE Packet1cd pand< Packet1cd >(const Packet1cd &a, const Packet1cd &b)
EIGEN_STRONG_INLINE Packet2cf ploadu< Packet2cf >(const std::complex< float > *from)
#define EIGEN_DEBUG_ALIGNED_STORE
__vector unsigned int Packet4ui
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf &x, const Packet2cf &y, const Packet2cf &c) const
static constexpr size_t size(Tuple< Args... > &)
Provides access to the number of elements in a tuple as a compile-time constant expression.
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf &x, const Packet2cf &y, const Packet2cf &c) const
EIGEN_STRONG_INLINE Packet1cd pdiv< Packet1cd >(const Packet1cd &a, const Packet1cd &b)
EIGEN_STRONG_INLINE Packet2cf pload< Packet2cf >(const std::complex< float > *from)
EIGEN_STRONG_INLINE Packet2cf pset1< Packet2cf >(const std::complex< float > &from)
EIGEN_STRONG_INLINE Packet1cd pxor< Packet1cd >(const Packet1cd &a, const Packet1cd &b)
EIGEN_STRONG_INLINE std::complex< float > pfirst< Packet2cf >(const Packet2cf &a)
#define EIGEN_DEBUG_UNALIGNED_STORE
EIGEN_STRONG_INLINE Packet1cd pset1< Packet1cd >(const std::complex< double > &from)
#define EIGEN_DEBUG_ALIGNED_LOAD
EIGEN_STRONG_INLINE Packet2cf pcplxflip(const Packet2cf &x)
EIGEN_STRONG_INLINE Packet2cf psub< Packet2cf >(const Packet2cf &a, const Packet2cf &b)
EIGEN_STRONG_INLINE Packet2d padd< Packet2d >(const Packet2d &a, const Packet2d &b)
EIGEN_DEVICE_FUNC Packet padd(const Packet &a, const Packet &b)
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf &a, const Packet2cf &b) const
EIGEN_STRONG_INLINE Packet2cf(const Packet4f &a)
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf &x, const Packet2cf &y, const Packet2cf &c) const
EIGEN_STRONG_INLINE std::complex< double > predux< Packet1cd >(const Packet1cd &a)
EIGEN_DEVICE_FUNC void pstoreu(Scalar *to, const Packet &from)
EIGEN_STRONG_INLINE Packet4f ploadu< Packet4f >(const float *from)
EIGEN_STRONG_INLINE void ptranspose(PacketBlock< Packet2cf, 2 > &kernel)
EIGEN_STRONG_INLINE Packet2cf pand< Packet2cf >(const Packet2cf &a, const Packet2cf &b)
EIGEN_STRONG_INLINE Packet2cf()
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
EIGEN_STRONG_INLINE Packet1cd pandnot< Packet1cd >(const Packet1cd &a, const Packet1cd &b)
EIGEN_DEVICE_FUNC unpacket_traits< Packet >::type pfirst(const Packet &a)
EIGEN_STRONG_INLINE Packet1cd pload< Packet1cd >(const std::complex< double > *from)
const mpreal sum(const mpreal tab[], const unsigned long int n, int &status, mp_rnd_t mode=mpreal::get_default_rnd())
EIGEN_STRONG_INLINE std::complex< float > predux_mul< Packet2cf >(const Packet2cf &a)
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf &a, const Packet2cf &b) const
EIGEN_STRONG_INLINE Packet2d ploadu< Packet2d >(const double *from)
EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf &a)
EIGEN_STRONG_INLINE Packet2d pload< Packet2d >(const double *from)
EIGEN_STRONG_INLINE Packet2d psub< Packet2d >(const Packet2d &a, const Packet2d &b)
EIGEN_STRONG_INLINE std::complex< float > predux< Packet2cf >(const Packet2cf &a)
EIGEN_STRONG_INLINE Packet2cf ploaddup< Packet2cf >(const std::complex< float > *from)
EIGEN_STRONG_INLINE std::complex< double > predux_mul< Packet1cd >(const Packet1cd &a)
EIGEN_DEVICE_FUNC void pstore(Scalar *to, const Packet &from)
EIGEN_STRONG_INLINE Packet4f pload< Packet4f >(const float *from)
uint32x2_t p2ui_CONJ_XOR()
EIGEN_STRONG_INLINE Packet1cd psub< Packet1cd >(const Packet1cd &a, const Packet1cd &b)
EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf &a)
EIGEN_DEVICE_FUNC Packet pdiv(const Packet &a, const Packet &b)
static EIGEN_STRONG_INLINE void run(Packet2cf &first, const Packet2cf &second)
EIGEN_STRONG_INLINE Packet4f padd< Packet4f >(const Packet4f &a, const Packet4f &b)
EIGEN_STRONG_INLINE Packet1cd pmul< Packet1cd >(const Packet1cd &a, const Packet1cd &b)
#define EIGEN_ARM_PREFETCH(ADDR)
EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
static Packet4ui p4ui_CONJ_XOR
EIGEN_STRONG_INLINE Packet1cd ploadu< Packet1cd >(const std::complex< double > *from)
EIGEN_STRONG_INLINE Packet2d pset1< Packet2d >(const double &from)
EIGEN_DEVICE_FUNC const ImagReturnType imag() const
std::complex< float > type
EIGEN_STRONG_INLINE Packet1cd por< Packet1cd >(const Packet1cd &a, const Packet1cd &b)
EIGEN_STRONG_INLINE Packet2cf pxor< Packet2cf >(const Packet2cf &a, const Packet2cf &b)
EIGEN_STRONG_INLINE Packet2d pmul< Packet2d >(const Packet2d &a, const Packet2d &b)
EIGEN_STRONG_INLINE Packet2cf padd< Packet2cf >(const Packet2cf &a, const Packet2cf &b)
void run(Expr &expr, Dev &dev)
EIGEN_STRONG_INLINE Packet4f pset1< Packet4f >(const float &from)
EIGEN_DEVICE_FUNC const Scalar & b
EIGEN_DEVICE_FUNC Packet pmul(const Packet &a, const Packet &b)
EIGEN_STRONG_INLINE Packet4f psub< Packet4f >(const Packet4f &a, const Packet4f &b)
EIGEN_STRONG_INLINE Packet2cf pandnot< Packet2cf >(const Packet2cf &a, const Packet2cf &b)
EIGEN_STRONG_INLINE std::complex< double > pfirst< Packet1cd >(const Packet1cd &a)
EIGEN_STRONG_INLINE Packet2cf pdiv< Packet2cf >(const Packet2cf &a, const Packet2cf &b)
EIGEN_STRONG_INLINE Packet2cf pcplxflip< Packet2cf >(const Packet2cf &x)
EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf &a)