10 #ifndef EIGEN_COMPLEX_SSE_H
11 #define EIGEN_COMPLEX_SSE_H
27 #ifndef EIGEN_VECTORIZE_AVX
59 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
60 return Packet2cf(_mm_xor_ps(
a.v,mask));
64 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
65 return Packet2cf(_mm_xor_ps(
a.v,mask));
70 #ifdef EIGEN_VECTORIZE_SSE3
71 return Packet2cf(_mm_addsub_ps(_mm_mul_ps(_mm_moveldup_ps(
a.v),
b.v),
72 _mm_mul_ps(_mm_movehdup_ps(
a.v),
78 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x00000000,0x80000000,0x00000000));
79 return Packet2cf(_mm_add_ps(_mm_mul_ps(
vec4f_swizzle1(
a.v, 0, 0, 2, 2),
b.v),
96 #if EIGEN_GNUC_AT_MOST(4,2)
98 res.v = _mm_loadl_pi(_mm_set1_ps(0.0f),
reinterpret_cast<const __m64*
>(&from));
99 #elif EIGEN_GNUC_AT_LEAST(4,6)
101 #pragma GCC diagnostic push
102 #pragma GCC diagnostic ignored "-Wuninitialized"
103 res.v = _mm_loadl_pi(res.v, (
const __m64*)&from);
104 #pragma GCC diagnostic pop
106 res.v = _mm_loadl_pi(res.v, (
const __m64*)&from);
108 return Packet2cf(_mm_movelh_ps(res.v,res.v));
117 template<> EIGEN_DEVICE_FUNC
inline Packet2cf pgather<std::complex<float>, Packet2cf>(
const std::complex<float>* from,
Index stride)
123 template<> EIGEN_DEVICE_FUNC
inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to,
const Packet2cf& from,
Index stride)
125 to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 0)),
126 _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 1)));
127 to[stride*1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 2)),
128 _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3)));
135 #if EIGEN_GNUC_AT_MOST(4,3)
139 _mm_store_ps((
float*)res,
a.v);
142 std::complex<float> res;
143 _mm_storel_pi((__m64*)&res,
a.v);
152 return pfirst(Packet2cf(_mm_add_ps(
a.v, _mm_movehl_ps(
a.v,
a.v))));
157 return Packet2cf(_mm_add_ps(_mm_movelh_ps(vecs[0].v,vecs[1].v), _mm_movehl_ps(vecs[1].v,vecs[0].v)));
166 struct palign_impl<Offset,Packet2cf>
172 first.v = _mm_movehl_ps(first.v, first.v);
173 first.v = _mm_movelh_ps(first.v, second.v);
178 template<>
struct conj_helper<Packet2cf, Packet2cf, false,true>
185 #ifdef EIGEN_VECTORIZE_SSE3
188 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
196 template<>
struct conj_helper<Packet2cf, Packet2cf, true,false>
203 #ifdef EIGEN_VECTORIZE_SSE3
206 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
214 template<>
struct conj_helper<Packet2cf, Packet2cf, true,true>
221 #ifdef EIGEN_VECTORIZE_SSE3
224 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
237 Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(
a,
b);
238 __m128
s = _mm_mul_ps(
b.v,
b.v);
239 return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(
s,_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(
s), 0xb1)))));
258 #ifndef EIGEN_VECTORIZE_AVX
290 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
296 #ifdef EIGEN_VECTORIZE_SSE3
297 return Packet1cd(_mm_addsub_pd(_mm_mul_pd(_mm_movedup_pd(
a.v),
b.v),
301 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
332 _mm_store_pd(res,
a.v);
333 return std::complex<double>(res[0],res[1]);
370 #ifdef EIGEN_VECTORIZE_SSE3
373 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
388 #ifdef EIGEN_VECTORIZE_SSE3
391 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
406 #ifdef EIGEN_VECTORIZE_SSE3
409 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
423 __m128d
s = _mm_mul_pd(
b.v,
b.v);
424 return Packet1cd(_mm_div_pd(res.
v, _mm_add_pd(
s,_mm_shuffle_pd(
s,
s, 0x1))));
432 EIGEN_DEVICE_FUNC
inline void
433 ptranspose(PacketBlock<Packet2cf,2>& kernel) {
434 __m128d w1 = _mm_castps_pd(kernel.packet[0].v);
435 __m128d w2 = _mm_castps_pd(kernel.packet[1].v);
437 __m128 tmp = _mm_castpd_ps(_mm_unpackhi_pd(w1, w2));
438 kernel.packet[0].v = _mm_castpd_ps(_mm_unpacklo_pd(w1, w2));
439 kernel.packet[1].v = tmp;
443 __m128d result = pblend<Packet2d>(ifPacket, _mm_castps_pd(thenPacket.v), _mm_castps_pd(elsePacket.v));
449 return Packet2cf(_mm_loadl_pi(
a.v,
reinterpret_cast<const __m64*
>(&
b)));
459 return Packet2cf(_mm_loadh_pi(
a.v,
reinterpret_cast<const __m64*
>(&
b)));
471 #endif // EIGEN_COMPLEX_SSE_H