11 #ifndef EIGEN_COMPLEX_NEON_H 
   12 #define EIGEN_COMPLEX_NEON_H 
   21 #if EIGEN_COMP_CLANG || EIGEN_COMP_CASTXML 
   22   uint32x4_t 
ret = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
 
   25   static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
 
   26   return vld1q_u32( conj_XOR_DATA );
 
   32   static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000 };
 
   33   return vld1_u32( conj_XOR_DATA );
 
   51 template<> 
struct packet_traits<
std::
complex<float> > : default_packet_traits
 
   77   typedef std::complex<float> 
type;
 
   89 template<> 
struct unpacket_traits<Packet2cf>
 
   91   typedef std::complex<float> 
type;
 
  105 { 
return Packet1cf(vset_lane_f32(
a, vdup_n_f32(0.
f), 0)); }
 
  107 { 
return Packet2cf(vreinterpretq_f32_u64(vmovl_u32(vreinterpret_u32_f32(
a)))); }
 
  110 { 
return Packet1cf(vld1_f32(
reinterpret_cast<const float*
>(&from))); }
 
  113   const float32x2_t r64 = vld1_f32(
reinterpret_cast<const float*
>(&from));
 
  114   return Packet2cf(vcombine_f32(r64, r64));
 
  138   return Packet2cf(vreinterpretq_f32_u32(veorq_u32(
b, 
p4ui_CONJ_XOR())));
 
  146   v1 = vdup_lane_f32(
a.v, 0);
 
  148   v2 = vdup_lane_f32(
a.v, 1);
 
  150   v1 = vmul_f32(
v1, 
b.v);
 
  152   v2 = vmul_f32(
v2, 
b.v);
 
  165   v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(
a.v), 0), vdup_lane_f32(vget_high_f32(
a.v), 0));
 
  167   v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(
a.v), 1), vdup_lane_f32(vget_high_f32(
a.v), 1));
 
  169   v1 = vmulq_f32(
v1, 
b.v);
 
  171   v2 = vmulq_f32(
v2, 
b.v);
 
  173   v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(
v2), 
p4ui_CONJ_XOR()));
 
  175   v2 = vrev64q_f32(
v2);
 
  177   return Packet2cf(vaddq_f32(
v1, 
v2));
 
  187   Packet2f eq_swapped = vrev64_f32(eq);
 
  198   Packet4f eq_swapped = vrev64q_f32(eq);
 
  204 { 
return Packet1cf(vreinterpret_f32_u32(vand_u32(vreinterpret_u32_f32(
a.v), vreinterpret_u32_f32(
b.v)))); }
 
  206 { 
return Packet2cf(vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(
a.v), vreinterpretq_u32_f32(
b.v)))); }
 
  209 { 
return Packet1cf(vreinterpret_f32_u32(vorr_u32(vreinterpret_u32_f32(
a.v), vreinterpret_u32_f32(
b.v)))); }
 
  211 { 
return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(
a.v), vreinterpretq_u32_f32(
b.v)))); }
 
  214 { 
return Packet1cf(vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(
a.v), vreinterpret_u32_f32(
b.v)))); }
 
  216 { 
return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(
a.v), vreinterpretq_u32_f32(
b.v)))); }
 
  219 { 
return Packet1cf(vreinterpret_f32_u32(vbic_u32(vreinterpret_u32_f32(
a.v), vreinterpret_u32_f32(
b.v)))); }
 
  221 { 
return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(
a.v), vreinterpretq_u32_f32(
b.v)))); }
 
  240 template<> 
EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> *to, 
const Packet2cf& from)
 
  245 template<> 
EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> *to, 
const Packet2cf& from)
 
  249     const std::complex<float>* from, 
Index stride)
 
  254 template<> 
EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(
 
  255     const std::complex<float>* from, 
Index stride)
 
  261   return Packet2cf(
res);
 
  266 { to[stride*0] = std::complex<float>(vget_lane_f32(from.v, 0), vget_lane_f32(from.v, 1)); }
 
  267 template<> 
EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(
 
  268     std::complex<float>* to, 
const Packet2cf& from, 
Index stride)
 
  270   to[stride*0] = std::complex<float>(vgetq_lane_f32(from.v, 0), vgetq_lane_f32(from.v, 1));
 
  271   to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
 
  274 template<> 
EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(
const std::complex<float> *addr)
 
  280   vst1_f32(
reinterpret_cast<float*
>(&
x), 
a.v);
 
  286   vst1q_f32(
reinterpret_cast<float*
>(
x), 
a.v);
 
  292 { 
return Packet2cf(vcombine_f32(vget_high_f32(
a.v), vget_low_f32(
a.v))); }
 
  297 { 
return Packet2cf(vrev64q_f32(
a.v)); }
 
  301   std::complex<float> 
s;
 
  302   vst1_f32((
float *)&
s, 
a.v);
 
  307   std::complex<float> 
s;
 
  308   vst1_f32(
reinterpret_cast<float*
>(&
s), vadd_f32(vget_low_f32(
a.v), vget_high_f32(
a.v)));
 
  314   std::complex<float> 
s;
 
  315   vst1_f32((
float *)&
s, 
a.v);
 
  321   std::complex<float> 
s;
 
  323   a1 = vget_low_f32(
a.v);
 
  324   a2 = vget_high_f32(
a.v);
 
  326   v1 = vdup_lane_f32(
a1, 0);
 
  328   v2 = vdup_lane_f32(
a1, 1);
 
  340   vst1_f32(
reinterpret_cast<float*
>(&
s), 
prod);
 
  355   s = vmul_f32(
b.v, 
b.v);
 
  356   rev_s = vrev64_f32(
s);
 
  367   s = vmulq_f32(
b.v, 
b.v);
 
  368   rev_s = vrev64q_f32(
s);
 
  376   Packet4f tmp = vcombine_f32(vget_high_f32(kernel.packet[0].v), vget_high_f32(kernel.packet[1].v));
 
  377   kernel.packet[0].v = vcombine_f32(vget_low_f32(kernel.packet[0].v), vget_low_f32(kernel.packet[1].v));
 
  378   kernel.packet[1].v = tmp;
 
  382   return psqrt_complex<Packet1cf>(
a);
 
  386   return psqrt_complex<Packet2cf>(
a);
 
  390 #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG 
  393 #if EIGEN_COMP_CLANG || EIGEN_COMP_CASTXML 
  394   static uint64x2_t p2ul_CONJ_XOR = {0x0, 0x8000000000000000};
 
  396   const uint64_t  p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 };
 
  397   static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );
 
  407 template<> 
struct packet_traits<
std::
complex<double> >  : default_packet_traits
 
  409   typedef Packet1cd 
type;
 
  410   typedef Packet1cd 
half;
 
  431 template<> 
struct unpacket_traits<Packet1cd>
 
  433   typedef std::complex<double> 
type;
 
  434   typedef Packet1cd 
half;
 
  465 { 
return Packet1cd(pnegate<Packet2d>(
a.v)); }
 
  468 { 
return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(
a.v), p2ul_CONJ_XOR))); }
 
  475   v1 = vdupq_lane_f64(vget_low_f64(
a.v), 0);
 
  477   v2 = vdupq_lane_f64(vget_high_f64(
a.v), 0);
 
  479   v1 = vmulq_f64(
v1, 
b.v);
 
  481   v2 = vmulq_f64(
v2, 
b.v);
 
  483   v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(
v2), p2ul_CONJ_XOR));
 
  485   v2 = preverse<Packet2d>(
v2);
 
  487   return Packet1cd(vaddq_f64(
v1, 
v2));
 
  497   Packet2d eq_swapped = vreinterpretq_f64_u32(vrev64q_u32(vreinterpretq_u32_f64(eq)));
 
  503 { 
return Packet1cd(vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(
a.v),vreinterpretq_u64_f64(
b.v)))); }
 
  506 { 
return Packet1cd(vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(
a.v),vreinterpretq_u64_f64(
b.v)))); }
 
  509 { 
return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(
a.v),vreinterpretq_u64_f64(
b.v)))); }
 
  512 { 
return Packet1cd(vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(
a.v),vreinterpretq_u64_f64(
b.v)))); }
 
  517 template<> 
EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> *to, 
const Packet1cd& from)
 
  520 template<> 
EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> *to, 
const Packet1cd& from)
 
  523 template<> 
EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(
const std::complex<double> *addr)
 
  526 template<> 
EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(
 
  527     const std::complex<double>* from, 
Index stride)
 
  532   return Packet1cd(
res);
 
  535 template<> 
EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(
 
  536     std::complex<double>* to, 
const Packet1cd& from, 
Index stride)
 
  537 { to[stride*0] = std::complex<double>(vgetq_lane_f64(from.v, 0), vgetq_lane_f64(from.v, 1)); }
 
  542   pstore<std::complex<double> >(&
res, 
a);
 
  569   Packet2d tmp = vcombine_f64(vget_high_f64(kernel.packet[0].v), vget_high_f64(kernel.packet[1].v));
 
  570   kernel.packet[0].v = vcombine_f64(vget_low_f64(kernel.packet[0].v), vget_low_f64(kernel.packet[1].v));
 
  571   kernel.packet[1].v = tmp;
 
  575   return psqrt_complex<Packet1cd>(
a);
 
  578 #endif // EIGEN_ARCH_ARM64 
  584 #endif // EIGEN_COMPLEX_NEON_H