35 #ifndef EIGEN_INVERSE_SIZE_4_H
36 #define EIGEN_INVERSE_SIZE_4_H
42 template <
typename MatrixType,
typename ResultType>
60 Packet4f _L2 = ploadt<Packet4f,MatrixAlignment>(
data + stride*4);
61 Packet4f _L3 = ploadt<Packet4f,MatrixAlignment>(
data + stride*8);
62 Packet4f _L4 = ploadt<Packet4f,MatrixAlignment>(
data + stride*12);
69 if (!StorageOrdersMatch)
88 AB =
psub(AB,
pmul(
vec4f_swizzle2(
A,
A, 1, 1, 2, 2),
vec4f_swizzle2(
B,
B, 2, 3, 0, 1)));
92 DC =
psub(DC,
pmul(
vec4f_swizzle2(
D,
D, 1, 1, 2, 2),
vec4f_swizzle2(
C,
C, 2, 3, 0, 1)));
138 iB =
psub(iB,
pmul(
vec4f_swizzle2(
D,
D, 1, 0, 3, 2),
vec4f_swizzle2(AB, AB, 2, 1, 2, 1)));
143 iC =
psub(iC,
pmul(
vec4f_swizzle2(
A,
A, 1, 0, 3, 2),
vec4f_swizzle2(DC, DC, 2, 1, 2, 1)));
146 const float sign_mask[4] = {0.0f, numext::bit_cast<float>(0x80000000u), numext::bit_cast<float>(0x80000000u), 0.0f};
148 rd =
pxor(rd, p4f_sign_PNNP);
157 pstoret<float, Packet4f, ResultAlignment>(
res + 0,
vec4f_swizzle2(iA, iB, 3, 1, 3, 1));
158 pstoret<float, Packet4f, ResultAlignment>(
res + res_stride,
vec4f_swizzle2(iA, iB, 2, 0, 2, 0));
159 pstoret<float, Packet4f, ResultAlignment>(
res + 2 * res_stride,
vec4f_swizzle2(iC, iD, 3, 1, 3, 1));
160 pstoret<float, Packet4f, ResultAlignment>(
res + 3 * res_stride,
vec4f_swizzle2(iC, iD, 2, 0, 2, 0));
164 #if !(defined EIGEN_VECTORIZE_NEON && !(EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG))
167 template <
typename MatrixType,
typename ResultType>
178 typename MatrixType::PlainObject>::
type
196 if (StorageOrdersMatch)
198 A1 = ploadt<Packet2d,MatrixAlignment>(
data + stride*0);
199 B1 = ploadt<Packet2d,MatrixAlignment>(
data + stride*2);
200 A2 = ploadt<Packet2d,MatrixAlignment>(
data + stride*4);
201 B2 = ploadt<Packet2d,MatrixAlignment>(
data + stride*6);
202 C1 = ploadt<Packet2d,MatrixAlignment>(
data + stride*8);
203 D1 = ploadt<Packet2d,MatrixAlignment>(
data + stride*10);
204 C2 = ploadt<Packet2d,MatrixAlignment>(
data + stride*12);
205 D2 = ploadt<Packet2d,MatrixAlignment>(
data + stride*14);
210 A1 = ploadt<Packet2d,MatrixAlignment>(
data + stride*0);
211 C1 = ploadt<Packet2d,MatrixAlignment>(
data + stride*2);
212 A2 = ploadt<Packet2d,MatrixAlignment>(
data + stride*4);
213 C2 = ploadt<Packet2d,MatrixAlignment>(
data + stride*6);
222 B1 = ploadt<Packet2d,MatrixAlignment>(
data + stride*8);
223 D1 = ploadt<Packet2d,MatrixAlignment>(
data + stride*10);
224 B2 = ploadt<Packet2d,MatrixAlignment>(
data + stride*12);
225 D2 = ploadt<Packet2d,MatrixAlignment>(
data + stride*14);
291 Packet2d iA1, iA2, iB1, iB2, iC1, iC2, iD1, iD2;
329 const double sign_mask1[2] = {0.0, numext::bit_cast<double>(0x8000000000000000ull)};
330 const double sign_mask2[2] = {numext::bit_cast<double>(0x8000000000000000ull), 0.0};
333 d1 =
pxor(rd, sign_PN);
342 pstoret<double, Packet2d, ResultAlignment>(
res + 2 * res_stride,
pmul(
vec2d_swizzle2(iC2, iC1, 3), d1));
344 pstoret<double, Packet2d, ResultAlignment>(
res + 2 * res_stride + 2,
pmul(
vec2d_swizzle2(iD2, iD1, 3), d1));