13 #include <xmmintrin.h> 14 #include <emmintrin.h> 28 enum { NUM_ELEMENTS = 4 };
35 Vector (__m128i
const vec);
37 Vector (
float n0,
float n1,
float n2,
float n3);
38 Vector (uint32_t encoding);
39 Vector (uint32_t e0, uint32_t e1, uint32_t e2, uint32_t e3);
43 Vector& operator= (__m128
const vec);
44 Vector& operator= (__m128i
const vec);
48 operator __m128 ()
const;
50 operator __m128i ()
const;
66 #if defined(GTE_USE_ROW_MAJOR) 67 STORAGE_ROW_MAJOR = 1,
69 STORAGE_ROW_MAJOR = 0,
77 Matrix (__m128
const* mat);
79 float m00,
float m01,
float m02,
float m03,
80 float m10,
float m11,
float m12,
float m13,
81 float m20,
float m21,
float m22,
float m23,
82 float m30,
float m31,
float m32,
float m33);
86 Matrix& operator= (__m128
const* mat);
90 operator __m128
const* ()
const;
93 __m128
const& operator[] (
int i)
const;
94 __m128& operator[] (
int i);
104 inline static __m128 Not (__m128
const v);
105 inline static __m128 And (__m128
const v0, __m128
const v1);
106 inline static __m128 AndNot (__m128
const v0, __m128
const v1);
107 inline static __m128 Or (__m128
const v0, __m128
const v1);
108 inline static __m128 Xor (__m128
const v0, __m128
const v1);
109 inline static __m128 Select (__m128
const c, __m128
const v0, __m128
const v1);
112 inline static __m128 Equal (__m128
const v0, __m128
const v1);
113 inline static __m128 NotEqual (__m128
const v0, __m128
const v1);
114 inline static __m128 Less (__m128
const v0, __m128
const v1);
115 inline static __m128 LessEqual (__m128
const v0, __m128
const v1);
116 inline static __m128 Greater (__m128
const v0, __m128
const v1);
117 inline static __m128 GreaterEqual (__m128
const v0, __m128
const v1);
120 inline static __m128 Negate (__m128
const v);
121 inline static __m128 Add (__m128
const v0, __m128
const v1);
122 inline static __m128 Subtract (__m128
const v0, __m128
const v1);
123 inline static __m128 Multiply (__m128
const v0, __m128
const v1);
124 inline static __m128 Divide (__m128
const v0, __m128
const v1);
125 inline static __m128 Round (__m128
const v);
126 inline static __m128 MaximumAbsoluteComponent (__m128
const v);
129 inline static __m128
Dot (__m128
const v0, __m128
const v1);
130 inline static __m128
Length (__m128
const v);
131 inline static __m128 LengthRobust (__m128
const v);
132 inline static __m128
Normalize (__m128
const v);
133 inline static __m128 NormalizeGetLength (__m128
const v, __m128&
length);
134 inline static __m128 NormalizeRobust (__m128
const v);
135 inline static __m128 NormalizeRobustGetLength (__m128
const v, __m128&
length);
136 inline static __m128
Cross (__m128
const v0, __m128
const v1);
139 inline static void Negate (__m128
const* M, __m128*
result);
140 inline static void Add (__m128
const* A, __m128
const*B, __m128*
result);
141 inline static void Subtract (__m128
const* A, __m128
const* B, __m128*
result);
142 inline static void Multiply (__m128
const* M, __m128
const c, __m128*
result);
143 inline static void Divide (__m128
const* M, __m128
const c, __m128*
result);
146 inline static void Transpose (__m128
const* mat, __m128* trn);
147 inline static void Inverse (__m128
const* mat, __m128* inv);
148 inline static void Adjoint (__m128
const* mat, __m128* adj);
149 inline static __m128
Determinant (__m128
const* mat);
150 inline static __m128
L1Norm (__m128
const* mat);
151 inline static __m128
L2Norm (__m128
const* mat);
155 inline static void MultiplyAB (__m128
const* A, __m128
const* B, __m128* AB);
156 inline static void MultiplyATB (__m128
const* A, __m128
const* B, __m128* ATB);
157 inline static void MultiplyABT (__m128
const* A, __m128
const* B, __m128* ABT);
158 inline static void MultiplyATBT (__m128
const* A, __m128
const* B, __m128* ATBT);
159 inline static void MultiplyDM (__m128
const D, __m128
const* M, __m128* DM);
160 inline static void MultiplyMD (__m128
const* M, __m128
const D, __m128* MD);
163 inline static __m128 MultiplyMV (__m128
const* M, __m128
const V);
164 inline static __m128 MultiplyVM (__m128
const V, __m128
const* M);
169 inline static __m128 QMultiply (__m128
const q0, __m128
const q1);
170 inline static __m128 QConjugate (__m128
const q);
171 inline static __m128 QInverse (__m128
const q);
172 inline static __m128 QSlerp (__m128
const t, __m128
const q0, __m128
const q1);
175 inline static __m128 Sin (__m128
const v);
176 inline static __m128 Cos (__m128
const v);
177 inline static __m128 Tan (__m128
const v);
178 inline static __m128 ASin (__m128
const v);
179 inline static __m128 ACos (__m128
const v);
180 inline static __m128 ATan (__m128
const v);
188 inline static void ReduceAnglesSin (__m128
const x, __m128&
y);
189 inline static __m128 SinApprDeg11 (__m128
const x);
190 inline static __m128 SinApprDeg7 (__m128
const x);
196 inline static void ReduceAnglesCos (__m128
const x, __m128&
y, __m128& sign);
197 inline static __m128 CosApprDeg10 (__m128
const x, __m128
const sign);
198 inline static __m128 CosApprDeg6 (__m128
const x, __m128
const sign);
257 inline static void GetAdjDet (__m128
const* mat, __m128* adj, __m128* det);
306 mTuple(_mm_castsi128_ps(vec))
312 mTuple = _mm_set1_ps(number);
317 mTuple = _mm_set_ps(n3, n2, n1, n0);
322 mTuple = _mm_castsi128_ps(_mm_set1_epi32(encoding));
327 mTuple = _mm_castsi128_ps(_mm_set_epi32(e3, e2, e1, e0));
344 mTuple = _mm_castsi128_ps(vec);
348 inline SIMD::Vector::operator __m128 ()
353 inline SIMD::Vector::operator __m128 ()
const 358 inline SIMD::Vector::operator __m128i ()
360 return _mm_castps_si128(
mTuple);
363 inline SIMD::Vector::operator __m128i ()
const 365 return _mm_castps_si128(
mTuple);
379 mTable[0] = mat.
mTable[0];
380 mTable[1] = mat.
mTable[1];
381 mTable[2] = mat.
mTable[2];
382 mTable[3] = mat.
mTable[3];
394 float m00,
float m01,
float m02,
float m03,
395 float m10,
float m11,
float m12,
float m13,
396 float m20,
float m21,
float m22,
float m23,
397 float m30,
float m31,
float m32,
float m33)
399 #if defined(GTE_USE_ROW_MAJOR) 400 mTable[0] = _mm_setr_ps(m00, m01, m02, m03);
401 mTable[1] = _mm_setr_ps(m10, m11, m12, m13);
402 mTable[2] = _mm_setr_ps(m20, m21, m22, m23);
403 mTable[3] = _mm_setr_ps(m30, m31, m32, m33);
405 mTable[0] = _mm_setr_ps(m00, m10, m20, m30);
406 mTable[1] = _mm_setr_ps(m01, m11, m21, m31);
407 mTable[2] = _mm_setr_ps(m02, m12, m22, m32);
408 mTable[3] = _mm_setr_ps(m03, m13, m23, m33);
414 mTable[0] = mat.
mTable[0];
415 mTable[1] = mat.
mTable[1];
416 mTable[2] = mat.
mTable[2];
417 mTable[3] = mat.
mTable[3];
430 inline SIMD::Matrix::operator __m128* ()
435 inline SIMD::Matrix::operator __m128
const* ()
const 456 return _mm_xor_ps(v,
FFFF);
461 return _mm_and_ps(v0, v1);
466 return _mm_andnot_ps(v0, v1);
471 return _mm_or_ps(v0, v1);
476 return _mm_xor_ps(v0, v1);
481 return _mm_or_ps(_mm_and_ps(c, v0), _mm_andnot_ps(c, v1));
490 return _mm_cmpeq_ps(v0, v1);
495 return _mm_cmpneq_ps(v0, v1);
500 return _mm_cmplt_ps(v0, v1);
505 return _mm_cmple_ps(v0, v1);
510 return _mm_cmpgt_ps(v0, v1);
515 return _mm_cmpge_ps(v0, v1);
524 return _mm_xor_ps(v,
SIGN);
529 return _mm_add_ps(v0, v1);
534 return _mm_sub_ps(v0, v1);
539 return _mm_mul_ps(v0, v1);
544 return _mm_div_ps(v0, v1);
549 __m128
t0 = _mm_and_ps(
NSIGN, v);
550 t0 = _mm_castsi128_ps(_mm_cmplt_epi32(_mm_castps_si128(t0),
NOFRC));
551 __m128i
t1 = _mm_cvtps_epi32(v);
552 __m128 t2 = _mm_cvtepi32_ps(t1);
553 t2 = _mm_and_ps(t2, t0);
554 t0 = _mm_andnot_ps(t0, v);
555 t2 = _mm_or_ps(t2, t0);
561 __m128 vAbs = _mm_andnot_ps(
SIGN, v);
562 __m128 max0 = _mm_shuffle_ps(vAbs, vAbs, _MM_SHUFFLE(0, 0, 0, 0));
563 __m128 max1 = _mm_shuffle_ps(vAbs, vAbs, _MM_SHUFFLE(1, 1, 1, 1));
564 __m128 max2 = _mm_shuffle_ps(vAbs, vAbs, _MM_SHUFFLE(2, 2, 2, 2));
565 __m128 max3 = _mm_shuffle_ps(vAbs, vAbs, _MM_SHUFFLE(3, 3, 3, 3));
566 max0 = _mm_max_ps(max0, max1);
567 max2 = _mm_max_ps(max2, max3);
568 max0 = _mm_max_ps(max0, max2);
579 __m128
t0 = _mm_mul_ps(v0, v1);
582 __m128
t1 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(2, 3, 0, 1));
585 __m128 t2 = _mm_add_ps(t0, t1);
588 __m128 t3 = _mm_shuffle_ps(t2, t2, _MM_SHUFFLE(0, 0, 2, 2));
591 __m128 dotSplat = _mm_add_ps(t2, t3);
597 __m128 sqrLength =
Dot(v, v);
598 return _mm_sqrt_ps(sqrLength);
608 __m128
normalized = _mm_div_ps(v, maxComponent);
611 __m128
mask = _mm_cmpneq_ps(
ZZZZ, maxComponent);
612 normalized = _mm_and_ps(mask, normalized);
615 __m128 sqrLength =
Dot(normalized, normalized);
616 __m128
length = _mm_sqrt_ps(sqrLength);
623 __m128 sqrLength =
Dot(v, v);
626 __m128
length = _mm_sqrt_ps(sqrLength);
633 __m128
mask = _mm_cmpneq_ps(
ZZZZ, length);
634 normalized = _mm_and_ps(mask, normalized);
641 __m128 sqrLength =
Dot(v, v);
644 length = _mm_sqrt_ps(sqrLength);
651 __m128
mask = _mm_cmpneq_ps(
ZZZZ, length);
652 normalized = _mm_and_ps(mask, normalized);
653 length = _mm_and_ps(mask, length);
664 __m128
normalized = _mm_div_ps(v, maxComponent);
667 __m128
mask = _mm_cmpneq_ps(
ZZZZ, maxComponent);
668 normalized = _mm_and_ps(mask, normalized);
671 __m128 sqrLength =
Dot(normalized, normalized);
674 __m128
length = _mm_sqrt_ps(sqrLength);
678 normalized = _mm_div_ps(normalized, length);
682 normalized = _mm_and_ps(mask, normalized);
693 __m128
normalized = _mm_div_ps(v, maxComponent);
696 __m128
mask = _mm_cmpneq_ps(
ZZZZ, maxComponent);
697 normalized = _mm_and_ps(mask, normalized);
700 __m128 sqrLength =
Dot(normalized, normalized);
703 length = _mm_sqrt_ps(sqrLength);
707 normalized = _mm_div_ps(normalized, length);
708 length = _mm_mul_ps(length, maxComponent);
712 normalized = _mm_and_ps(mask, normalized);
713 length = _mm_and_ps(mask, length);
723 __m128
t0 = _mm_shuffle_ps(v0, v0, _MM_SHUFFLE(3, 0, 2, 1));
726 __m128
t1 = _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(3, 1, 0, 2));
729 __m128 cross = _mm_mul_ps(t0, t1);
732 t0 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(3, 0, 2, 1));
735 t1 = _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(3, 1, 0, 2));
738 t0 = _mm_mul_ps(t0, t1);
741 cross = _mm_sub_ps(cross, t0);
759 result[0] = _mm_add_ps(A[0], B[0]);
760 result[1] = _mm_add_ps(A[1], B[1]);
761 result[2] = _mm_add_ps(A[2], B[2]);
762 result[3] = _mm_add_ps(A[3], B[3]);
767 result[0] = _mm_sub_ps(A[0], B[0]);
768 result[1] = _mm_sub_ps(A[1], B[1]);
769 result[2] = _mm_sub_ps(A[2], B[2]);
770 result[3] = _mm_sub_ps(A[3], B[3]);
775 result[0] = _mm_mul_ps(M[0], c);
776 result[1] = _mm_mul_ps(M[1], c);
777 result[2] = _mm_mul_ps(M[2], c);
778 result[3] = _mm_mul_ps(M[3], c);
783 result[0] = _mm_div_ps(M[0], c);
784 result[1] = _mm_div_ps(M[1], c);
785 result[2] = _mm_div_ps(M[2], c);
786 result[3] = _mm_div_ps(M[3], c);
796 __m128
s0 = _mm_shuffle_ps(mat[0], mat[1], _MM_SHUFFLE(1, 0, 1, 0));
798 __m128
s1 = _mm_shuffle_ps(mat[2], mat[3], _MM_SHUFFLE(1, 0, 1, 0));
800 __m128 s2 = _mm_shuffle_ps(mat[0], mat[1], _MM_SHUFFLE(3, 2, 3, 2));
802 __m128 s3 = _mm_shuffle_ps(mat[2], mat[3], _MM_SHUFFLE(3, 2, 3, 2));
805 trn[0] = _mm_shuffle_ps(s0, s1, _MM_SHUFFLE(2, 0, 2, 0));
807 trn[1] = _mm_shuffle_ps(s0, s1, _MM_SHUFFLE(3, 1, 3, 1));
809 trn[2] = _mm_shuffle_ps(s2, s3, _MM_SHUFFLE(2, 0, 2, 0));
811 trn[3] = _mm_shuffle_ps(s2, s3, _MM_SHUFFLE(3, 1, 3, 1));
821 __m128 invDet = _mm_div_ps(
PPPP, det);
822 __m128 neqZero = _mm_cmpneq_ps(det,
ZZZZ);
823 invDet = _mm_and_ps(neqZero, invDet);
825 inv[0] = _mm_mul_ps(inv[0], invDet);
826 inv[1] = _mm_mul_ps(inv[1], invDet);
827 inv[2] = _mm_mul_ps(inv[2], invDet);
828 inv[3] = _mm_mul_ps(inv[3], invDet);
845 __m128 sum = _mm_andnot_ps(
SIMD::SIGN, mat[0]);
846 __m128 tmp = _mm_andnot_ps(
SIMD::SIGN, mat[1]);
847 sum = _mm_add_ps(sum, tmp);
849 sum = _mm_add_ps(sum, tmp);
851 sum = _mm_add_ps(sum, tmp);
858 __m128 sum = _mm_mul_ps(mat[0], mat[0]);
859 __m128 tmp = _mm_mul_ps(mat[1], mat[1]);
860 sum = _mm_add_ps(sum, tmp);
861 tmp = _mm_mul_ps(mat[2], mat[2]);
862 sum = _mm_add_ps(sum, tmp);
863 tmp = _mm_mul_ps(mat[3], mat[3]);
864 sum = _mm_add_ps(sum, tmp);
873 max = _mm_max_ps(max, tmp);
875 max = _mm_max_ps(max, tmp);
877 max = _mm_max_ps(max, tmp);
887 __m128
t0,
t1, t2, t3;
889 #if defined(GTE_USE_ROW_MAJOR) 890 t0 = _mm_shuffle_ps(A[0], A[0], _MM_SHUFFLE(0, 0, 0, 0));
891 t1 = _mm_shuffle_ps(A[0], A[0], _MM_SHUFFLE(1, 1, 1, 1));
892 t2 = _mm_shuffle_ps(A[0], A[0], _MM_SHUFFLE(2, 2, 2, 2));
893 t3 = _mm_shuffle_ps(A[0], A[0], _MM_SHUFFLE(3, 3, 3, 3));
894 t0 = _mm_mul_ps(t0, B[0]);
895 t1 = _mm_mul_ps(t1, B[1]);
896 t2 = _mm_mul_ps(t2, B[2]);
897 t3 = _mm_mul_ps(t3, B[3]);
898 t0 = _mm_add_ps(t0, t1);
899 t2 = _mm_add_ps(t2, t3);
900 AB[0] = _mm_add_ps(t0, t2);
902 t0 = _mm_shuffle_ps(A[1], A[1], _MM_SHUFFLE(0, 0, 0, 0));
903 t1 = _mm_shuffle_ps(A[1], A[1], _MM_SHUFFLE(1, 1, 1, 1));
904 t2 = _mm_shuffle_ps(A[1], A[1], _MM_SHUFFLE(2, 2, 2, 2));
905 t3 = _mm_shuffle_ps(A[1], A[1], _MM_SHUFFLE(3, 3, 3, 3));
906 t0 = _mm_mul_ps(t0, B[0]);
907 t1 = _mm_mul_ps(t1, B[1]);
908 t2 = _mm_mul_ps(t2, B[2]);
909 t3 = _mm_mul_ps(t3, B[3]);
910 t0 = _mm_add_ps(t0, t1);
911 t2 = _mm_add_ps(t2, t3);
912 AB[1] = _mm_add_ps(t0, t2);
914 t0 = _mm_shuffle_ps(A[2], A[2], _MM_SHUFFLE(0, 0, 0, 0));
915 t1 = _mm_shuffle_ps(A[2], A[2], _MM_SHUFFLE(1, 1, 1, 1));
916 t2 = _mm_shuffle_ps(A[2], A[2], _MM_SHUFFLE(2, 2, 2, 2));
917 t3 = _mm_shuffle_ps(A[2], A[2], _MM_SHUFFLE(3, 3, 3, 3));
918 t0 = _mm_mul_ps(t0, B[0]);
919 t1 = _mm_mul_ps(t1, B[1]);
920 t2 = _mm_mul_ps(t2, B[2]);
921 t3 = _mm_mul_ps(t3, B[3]);
922 t0 = _mm_add_ps(t0, t1);
923 t2 = _mm_add_ps(t2, t3);
924 AB[2] = _mm_add_ps(t0, t2);
926 t0 = _mm_shuffle_ps(A[3], A[3], _MM_SHUFFLE(0, 0, 0, 0));
927 t1 = _mm_shuffle_ps(A[3], A[3], _MM_SHUFFLE(1, 1, 1, 1));
928 t2 = _mm_shuffle_ps(A[3], A[3], _MM_SHUFFLE(2, 2, 2, 2));
929 t3 = _mm_shuffle_ps(A[3], A[3], _MM_SHUFFLE(3, 3, 3, 3));
930 t0 = _mm_mul_ps(t0, B[0]);
931 t1 = _mm_mul_ps(t1, B[1]);
932 t2 = _mm_mul_ps(t2, B[2]);
933 t3 = _mm_mul_ps(t3, B[3]);
934 t0 = _mm_add_ps(t0, t1);
935 t2 = _mm_add_ps(t2, t3);
936 AB[3] = _mm_add_ps(t0, t2);
938 t0 = _mm_shuffle_ps(B[0], B[0], _MM_SHUFFLE(0, 0, 0, 0));
939 t1 = _mm_shuffle_ps(B[0], B[0], _MM_SHUFFLE(1, 1, 1, 1));
940 t2 = _mm_shuffle_ps(B[0], B[0], _MM_SHUFFLE(2, 2, 2, 2));
941 t3 = _mm_shuffle_ps(B[0], B[0], _MM_SHUFFLE(3, 3, 3, 3));
942 t0 = _mm_mul_ps(t0, A[0]);
943 t1 = _mm_mul_ps(t1, A[1]);
944 t2 = _mm_mul_ps(t2, A[2]);
945 t3 = _mm_mul_ps(t3, A[3]);
946 t0 = _mm_add_ps(t0, t1);
947 t2 = _mm_add_ps(t2, t3);
948 AB[0] = _mm_add_ps(t0, t2);
950 t0 = _mm_shuffle_ps(B[1], B[1], _MM_SHUFFLE(0, 0, 0, 0));
951 t1 = _mm_shuffle_ps(B[1], B[1], _MM_SHUFFLE(1, 1, 1, 1));
952 t2 = _mm_shuffle_ps(B[1], B[1], _MM_SHUFFLE(2, 2, 2, 2));
953 t3 = _mm_shuffle_ps(B[1], B[1], _MM_SHUFFLE(3, 3, 3, 3));
954 t0 = _mm_mul_ps(t0, A[0]);
955 t1 = _mm_mul_ps(t1, A[1]);
956 t2 = _mm_mul_ps(t2, A[2]);
957 t3 = _mm_mul_ps(t3, A[3]);
958 t0 = _mm_add_ps(t0, t1);
959 t2 = _mm_add_ps(t2, t3);
960 AB[1] = _mm_add_ps(t0, t2);
962 t0 = _mm_shuffle_ps(B[2], B[2], _MM_SHUFFLE(0, 0, 0, 0));
963 t1 = _mm_shuffle_ps(B[2], B[2], _MM_SHUFFLE(1, 1, 1, 1));
964 t2 = _mm_shuffle_ps(B[2], B[2], _MM_SHUFFLE(2, 2, 2, 2));
965 t3 = _mm_shuffle_ps(B[2], B[2], _MM_SHUFFLE(3, 3, 3, 3));
966 t0 = _mm_mul_ps(t0, A[0]);
967 t1 = _mm_mul_ps(t1, A[1]);
968 t2 = _mm_mul_ps(t2, A[2]);
969 t3 = _mm_mul_ps(t3, A[3]);
970 t0 = _mm_add_ps(t0, t1);
971 t2 = _mm_add_ps(t2, t3);
972 AB[2] = _mm_add_ps(t0, t2);
974 t0 = _mm_shuffle_ps(B[3], B[3], _MM_SHUFFLE(0, 0, 0, 0));
975 t1 = _mm_shuffle_ps(B[3], B[3], _MM_SHUFFLE(1, 1, 1, 1));
976 t2 = _mm_shuffle_ps(B[3], B[3], _MM_SHUFFLE(2, 2, 2, 2));
977 t3 = _mm_shuffle_ps(B[3], B[3], _MM_SHUFFLE(3, 3, 3, 3));
978 t0 = _mm_mul_ps(t0, A[0]);
979 t1 = _mm_mul_ps(t1, A[1]);
980 t2 = _mm_mul_ps(t2, A[2]);
981 t3 = _mm_mul_ps(t3, A[3]);
982 t0 = _mm_add_ps(t0, t1);
983 t2 = _mm_add_ps(t2, t3);
984 AB[3] = _mm_add_ps(t0, t2);
1011 #if defined(GTE_USE_ROW_MAJOR) 1012 DM[0] = _mm_mul_ps(D, M[0]);
1013 DM[1] = _mm_mul_ps(D, M[1]);
1014 DM[2] = _mm_mul_ps(D, M[2]);
1015 DM[3] = _mm_mul_ps(D, M[3]);
1017 __m128 d0 = _mm_shuffle_ps(D, D, _MM_SHUFFLE(0, 0, 0, 0));
1018 __m128 d1 = _mm_shuffle_ps(D, D, _MM_SHUFFLE(1, 1, 1, 1));
1019 __m128 d2 = _mm_shuffle_ps(D, D, _MM_SHUFFLE(2, 2, 2, 2));
1020 __m128 d3 = _mm_shuffle_ps(D, D, _MM_SHUFFLE(3, 3, 3, 3));
1021 DM[0] = _mm_mul_ps(d0, M[0]);
1022 DM[1] = _mm_mul_ps(d1, M[1]);
1023 DM[2] = _mm_mul_ps(d2, M[2]);
1024 DM[3] = _mm_mul_ps(d3, M[3]);
1030 #if defined(GTE_USE_ROW_MAJOR) 1031 __m128 d0 = _mm_shuffle_ps(D, D, _MM_SHUFFLE(0, 0, 0, 0));
1032 __m128 d1 = _mm_shuffle_ps(D, D, _MM_SHUFFLE(1, 1, 1, 1));
1033 __m128 d2 = _mm_shuffle_ps(D, D, _MM_SHUFFLE(2, 2, 2, 2));
1034 __m128 d3 = _mm_shuffle_ps(D, D, _MM_SHUFFLE(3, 3, 3, 3));
1035 MD[0] = _mm_mul_ps(M[0], d0);
1036 MD[1] = _mm_mul_ps(M[1], d1);
1037 MD[2] = _mm_mul_ps(M[2], d2);
1038 MD[3] = _mm_mul_ps(M[3], d3);
1040 MD[0] = _mm_mul_ps(M[0], D);
1041 MD[1] = _mm_mul_ps(M[1], D);
1042 MD[2] = _mm_mul_ps(M[2], D);
1043 MD[3] = _mm_mul_ps(M[3], D);
1053 #if defined(GTE_USE_ROW_MAJOR) 1064 __m128
t0 = _mm_shuffle_ps(V, V, _MM_SHUFFLE(0, 0, 0, 0));
1066 __m128
t1 = _mm_shuffle_ps(V, V, _MM_SHUFFLE(1, 1, 1, 1));
1068 __m128 t2 = _mm_shuffle_ps(V, V, _MM_SHUFFLE(2, 2, 2, 2));
1070 __m128 t3 = _mm_shuffle_ps(V, V, _MM_SHUFFLE(3, 3, 3, 3));
1073 t0 = _mm_mul_ps(M[0], t0);
1075 t1 = _mm_mul_ps(M[1], t1);
1077 t2 = _mm_mul_ps(M[2], t2);
1079 t3 = _mm_mul_ps(M[3], t3);
1082 t0 = _mm_add_ps(t0, t1);
1084 t2 = _mm_add_ps(t2, t3);
1087 t0 = _mm_add_ps(t0, t2);
1094 #if defined(GTE_USE_ROW_MAJOR) 1101 __m128
t0 = _mm_shuffle_ps(V, V, _MM_SHUFFLE(0, 0, 0, 0));
1103 __m128
t1 = _mm_shuffle_ps(V, V, _MM_SHUFFLE(1, 1, 1, 1));
1105 __m128 t2 = _mm_shuffle_ps(V, V, _MM_SHUFFLE(2, 2, 2, 2));
1107 __m128 t3 = _mm_shuffle_ps(V, V, _MM_SHUFFLE(3, 3, 3, 3));
1110 t0 = _mm_mul_ps(t0, M[0]);
1112 t1 = _mm_mul_ps(t1, M[1]);
1114 t2 = _mm_mul_ps(t2, M[2]);
1116 t3 = _mm_mul_ps(t3, M[3]);
1119 t0 = _mm_add_ps(t0, t1);
1121 t2 = _mm_add_ps(t2, t3);
1124 t0 = _mm_add_ps(t0, t2);
1149 __m128
t0 = _mm_shuffle_ps(q0, q0, _MM_SHUFFLE(0, 0, 0, 0));
1151 __m128
t1 = _mm_shuffle_ps(q1, q1, _MM_SHUFFLE(0, 1, 2, 3));
1155 product = _mm_mul_ps(t0, t1);
1159 __m128
t0 = _mm_shuffle_ps(q0, q0, _MM_SHUFFLE(1, 1, 1, 1));
1161 __m128
t1 = _mm_shuffle_ps(q1, q1, _MM_SHUFFLE(1, 0, 3, 2));
1165 t1 = _mm_mul_ps(t0, t1);
1166 product = _mm_add_ps(product, t1);
1170 __m128
t0 = _mm_shuffle_ps(q0, q0, _MM_SHUFFLE(2, 2, 2, 2));
1172 __m128
t1 = _mm_shuffle_ps(q1, q1, _MM_SHUFFLE(2, 3, 0, 1));
1176 t1 = _mm_mul_ps(t0, t1);
1177 product = _mm_add_ps(product, t1);
1181 __m128
t0 = _mm_shuffle_ps(q0, q0, _MM_SHUFFLE(3, 3, 3, 3));
1183 __m128
t1 = _mm_mul_ps(t0, q1);
1185 product = _mm_add_ps(product, t1);
1192 __m128 conjugate = _mm_mul_ps(
SIMD::MMMP, q);
1199 __m128 conjugate = _mm_mul_ps(
SIMD::MMMP, q);
1202 __m128 sqrlen =
SIMD::Dot(conjugate, conjugate);
1206 __m128 inverse = _mm_div_ps(conjugate, sqrlen);
1210 inverse = _mm_and_ps(mask, inverse);
1216 float const onePlusMuFPU = 1.90110745351730037f;
1218 __m128 cs =
Dot(q0, q1);
1219 __m128 negative = _mm_cmplt_ps(cs,
ZZZZ);
1220 __m128 term0 = _mm_and_ps(negative,
MMMM);
1221 __m128 term1 = _mm_andnot_ps(negative,
PPPP);
1222 __m128 sign = _mm_or_ps(term0, term1);
1223 cs = _mm_mul_ps(cs, sign);
1225 __m128 csm1 = _mm_sub_ps(cs,
PPPP);
1226 __m128 omt = _mm_sub_ps(
PPPP, t);
1228 __m128 temp = _mm_shuffle_ps(omt, t, _MM_SHUFFLE(0, 0, 0, 0));
1230 __m128 coeff = _mm_shuffle_ps(temp,
ZZZZ, _MM_SHUFFLE(0, 0, 2, 0));
1232 __m128 sqr = _mm_mul_ps(coeff, u);
1234 __m128 avalue = _mm_set1_ps(1.0
f / (1.0
f*3.0
f));
1235 __m128 bvalue = _mm_set1_ps(1.0f / 3.0f);
1236 temp = _mm_mul_ps(avalue, sqr);
1237 temp = _mm_sub_ps(temp, bvalue);
1238 temp = _mm_mul_ps(temp, csm1);
1239 coeff = _mm_mul_ps(coeff, temp);
1240 u = _mm_add_ps(u, coeff);
1242 avalue = _mm_set1_ps(1.0f / (2.0f*5.0f));
1243 bvalue = _mm_set1_ps(2.0f / 5.0f);
1244 temp = _mm_mul_ps(avalue, sqr);
1245 temp = _mm_sub_ps(temp, bvalue);
1246 temp = _mm_mul_ps(temp, csm1);
1247 coeff = _mm_mul_ps(coeff, temp);
1248 u = _mm_add_ps(u, coeff);
1250 avalue = _mm_set1_ps(1.0f / (3.0f*7.0f));
1251 bvalue = _mm_set1_ps(3.0f / 7.0f);
1252 temp = _mm_mul_ps(avalue, sqr);
1253 temp = _mm_sub_ps(temp, bvalue);
1254 temp = _mm_mul_ps(temp, csm1);
1255 coeff = _mm_mul_ps(coeff, temp);
1256 u = _mm_add_ps(u, coeff);
1258 avalue = _mm_set1_ps(1.0f / (4.0f*9.0f));
1259 bvalue = _mm_set1_ps(4.0f / 9.0f);
1260 temp = _mm_mul_ps(avalue, sqr);
1261 temp = _mm_sub_ps(temp, bvalue);
1262 temp = _mm_mul_ps(temp, csm1);
1263 coeff = _mm_mul_ps(coeff, temp);
1264 u = _mm_add_ps(u, coeff);
1266 avalue = _mm_set1_ps(1.0f / (5.0f*11.0f));
1267 bvalue = _mm_set1_ps(5.0f / 11.0f);
1268 temp = _mm_mul_ps(avalue, sqr);
1269 temp = _mm_sub_ps(temp, bvalue);
1270 temp = _mm_mul_ps(temp, csm1);
1271 coeff = _mm_mul_ps(coeff, temp);
1272 u = _mm_add_ps(u, coeff);
1274 avalue = _mm_set1_ps(1.0f / (6.0f*13.0f));
1275 bvalue = _mm_set1_ps(6.0f / 13.0f);
1276 temp = _mm_mul_ps(avalue, sqr);
1277 temp = _mm_sub_ps(temp, bvalue);
1278 temp = _mm_mul_ps(temp, csm1);
1279 coeff = _mm_mul_ps(coeff, temp);
1280 u = _mm_add_ps(u, coeff);
1282 avalue = _mm_set1_ps(1.0f / (7.0f*15.0f));
1283 bvalue = _mm_set1_ps(7.0f / 15.0f);
1284 temp = _mm_mul_ps(avalue, sqr);
1285 temp = _mm_sub_ps(temp, bvalue);
1286 temp = _mm_mul_ps(temp, csm1);
1287 coeff = _mm_mul_ps(coeff, temp);
1288 u = _mm_add_ps(u, coeff);
1290 avalue = _mm_set1_ps(1.0f / (8.0f*17.0f));
1291 bvalue = _mm_set1_ps(8.0f / 17.0f);
1292 temp = _mm_mul_ps(avalue, sqr);
1293 temp = _mm_sub_ps(temp, bvalue);
1294 temp = _mm_mul_ps(temp, csm1);
1295 coeff = _mm_mul_ps(coeff, temp);
1296 u = _mm_add_ps(u, coeff);
1298 avalue = _mm_set1_ps(onePlusMuFPU*1.0f / (9.0f*19.0f));
1299 bvalue = _mm_set1_ps(onePlusMuFPU*9.0f / 19.0f);
1300 temp = _mm_mul_ps(avalue, sqr);
1301 temp = _mm_sub_ps(temp, bvalue);
1302 temp = _mm_mul_ps(temp, csm1);
1303 coeff = _mm_mul_ps(coeff, temp);
1304 u = _mm_add_ps(u, coeff);
1306 term0 = _mm_shuffle_ps(u, u, _MM_SHUFFLE(0, 0, 0, 0));
1307 term1 = _mm_shuffle_ps(u, u, _MM_SHUFFLE(1, 1, 1, 1));
1308 term0 = _mm_mul_ps(term0, q0);
1309 term1 = _mm_mul_ps(term1, q1);
1310 term1 = _mm_mul_ps(term1, sign);
1311 __m128 slerp = _mm_add_ps(term0, term1);
1322 result.m128_f32[0] = sin(v.m128_f32[0]);
1323 result.m128_f32[1] = sin(v.m128_f32[1]);
1324 result.m128_f32[2] = sin(v.m128_f32[2]);
1325 result.m128_f32[3] = sin(v.m128_f32[3]);
1332 result.m128_f32[0] = cos(v.m128_f32[0]);
1333 result.m128_f32[1] = cos(v.m128_f32[1]);
1334 result.m128_f32[2] = cos(v.m128_f32[2]);
1335 result.m128_f32[3] = cos(v.m128_f32[3]);
1342 result.m128_f32[0] = tan(v.m128_f32[0]);
1343 result.m128_f32[1] = tan(v.m128_f32[1]);
1344 result.m128_f32[2] = tan(v.m128_f32[2]);
1345 result.m128_f32[3] = tan(v.m128_f32[3]);
1352 result.m128_f32[0] = asin(v.m128_f32[0]);
1353 result.m128_f32[1] = asin(v.m128_f32[1]);
1354 result.m128_f32[2] = asin(v.m128_f32[2]);
1355 result.m128_f32[3] = asin(v.m128_f32[3]);
1362 result.m128_f32[0] = acos(v.m128_f32[0]);
1363 result.m128_f32[1] = acos(v.m128_f32[1]);
1364 result.m128_f32[2] = acos(v.m128_f32[2]);
1365 result.m128_f32[3] = acos(v.m128_f32[3]);
1372 result.m128_f32[0] = atan(v.m128_f32[0]);
1373 result.m128_f32[1] = atan(v.m128_f32[1]);
1374 result.m128_f32[2] = atan(v.m128_f32[2]);
1375 result.m128_f32[3] = atan(v.m128_f32[3]);
1387 quotient =
Round(quotient);
1388 y = _mm_mul_ps(quotient,
TWO_PI);
1389 y = _mm_sub_ps(x, y);
1392 __m128 sign = _mm_and_ps(x,
SIGN);
1393 __m128
c = _mm_or_ps(
PI, sign);
1394 __m128 absx = _mm_andnot_ps(sign, x);
1395 __m128 rflx = _mm_sub_ps(c, x);
1396 __m128 comp = _mm_cmple_ps(absx,
HALF_PI);
1397 __m128 select0 = _mm_and_ps(comp, x);
1398 __m128 select1 = _mm_andnot_ps(comp, rflx);
1399 y = _mm_or_ps(select0, select1);
1404 __m128 xsqr = _mm_mul_ps(x, x);
1407 poly = _mm_mul_ps(poly, xsqr);
1409 poly = _mm_mul_ps(poly, xsqr);
1411 poly = _mm_mul_ps(poly, xsqr);
1413 poly = _mm_mul_ps(poly, xsqr);
1415 poly = _mm_mul_ps(poly, x);
1421 __m128 xsqr = _mm_mul_ps(x, x);
1424 poly = _mm_mul_ps(poly, xsqr);
1426 poly = _mm_mul_ps(poly, xsqr);
1428 poly = _mm_mul_ps(poly, x);
1436 quotient =
Round(quotient);
1437 y = _mm_mul_ps(quotient,
TWO_PI);
1438 y = _mm_sub_ps(x, y);
1441 sign = _mm_and_ps(x,
SIGN);
1442 __m128
c = _mm_or_ps(
PI, sign);
1443 __m128 absx = _mm_andnot_ps(sign, x);
1444 __m128 rflx = _mm_sub_ps(c, x);
1445 __m128 comp = _mm_cmple_ps(absx,
HALF_PI);
1446 __m128 select0 = _mm_and_ps(comp, x);
1447 __m128 select1 = _mm_andnot_ps(comp, rflx);
1448 y = _mm_or_ps(select0, select1);
1449 select0 = _mm_and_ps(comp,
PPPP);
1450 select1 = _mm_andnot_ps(comp,
MMMM);
1451 sign = _mm_or_ps(select0, select1);
1456 __m128 xsqr = _mm_mul_ps(x, x);
1459 poly = _mm_mul_ps(poly, xsqr);
1461 poly = _mm_mul_ps(poly, xsqr);
1463 poly = _mm_mul_ps(poly, xsqr);
1465 poly = _mm_mul_ps(poly, xsqr);
1467 poly = _mm_mul_ps(poly, sign);
1473 __m128 xsqr = _mm_mul_ps(x, x);
1476 poly = _mm_mul_ps(poly, xsqr);
1478 poly = _mm_mul_ps(poly, xsqr);
1480 poly = _mm_mul_ps(poly, sign);
1542 t0 = _mm_shuffle_ps(mat[0], mat[1], _MM_SHUFFLE(0, 0, 0, 0));
1544 t1 = _mm_shuffle_ps(mat[2], mat[3], _MM_SHUFFLE(1, 1, 1, 1));
1546 t1 = _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2, 0, 2, 0));
1549 a1a2a3a4 = _mm_mul_ps(t0, t1);
1551 t0 = _mm_shuffle_ps(mat[0], mat[1], _MM_SHUFFLE(1, 1, 1, 1));
1553 t1 = _mm_shuffle_ps(mat[2], mat[3], _MM_SHUFFLE(0, 0, 0, 0));
1555 t1 = _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2, 0, 2, 0));
1558 t0 = _mm_mul_ps(t0, t1);
1561 a1a2a3a4 = _mm_sub_ps(a1a2a3a4, t0);
1568 t0 = _mm_shuffle_ps(mat[0], mat[1], _MM_SHUFFLE(2, 2, 2, 2));
1570 t1 = _mm_shuffle_ps(mat[2], mat[3], _MM_SHUFFLE(3, 3, 3, 3));
1572 t1 = _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2, 0, 2, 0));
1575 b1b2b3b4 = _mm_mul_ps(t0, t1);
1577 t0 = _mm_shuffle_ps(mat[0], mat[1], _MM_SHUFFLE(3, 3, 3, 3));
1579 t1 = _mm_shuffle_ps(mat[2], mat[3], _MM_SHUFFLE(2, 2, 2, 2));
1581 t1 = _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2, 0, 2, 0));
1584 t0 = _mm_mul_ps(t0, t1);
1587 b1b2b3b4 = _mm_sub_ps(b1b2b3b4, t0);
1594 t0 = _mm_shuffle_ps(mat[0], mat[2], _MM_SHUFFLE(2, 0, 2, 0));
1596 t1 = _mm_shuffle_ps(mat[1], mat[3], _MM_SHUFFLE(3, 1, 3, 1));
1599 a0b0a5b5 = _mm_mul_ps(t0, t1);
1601 t0 = _mm_shuffle_ps(mat[0], mat[2], _MM_SHUFFLE(3, 1, 3, 1));
1603 t1 = _mm_shuffle_ps(mat[1], mat[3], _MM_SHUFFLE(2, 0, 2, 0));
1606 t0 = _mm_mul_ps(t0, t1);
1609 a0b0a5b5 = _mm_sub_ps(a0b0a5b5, t0);
1616 __m128 slice0, slice1;
1618 __m128 b5b5b4b3 = _mm_shuffle_ps(a0b0a5b5, b1b2b3b4, _MM_SHUFFLE(2, 3, 3, 3));
1619 __m128 b4b2b2b1 = _mm_shuffle_ps(b1b2b3b4, b1b2b3b4, _MM_SHUFFLE(0, 1, 1, 3));
1620 __m128 b3b1b0b0 = _mm_shuffle_ps(b1b2b3b4, a0b0a5b5, _MM_SHUFFLE(1, 1, 0, 2));
1625 t0 = _mm_shuffle_ps(mat[1], mat[0], _MM_SHUFFLE(1, 1, 1, 1));
1627 t1 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(2, 2, 2, 0));
1630 slice0 = _mm_mul_ps(t1, b5b5b4b3);
1633 t0 = _mm_shuffle_ps(mat[2], mat[1], _MM_SHUFFLE(1, 1, 1, 1));
1636 t1 = _mm_mul_ps(t0, b4b2b2b1);
1637 slice0 = _mm_sub_ps(slice0, t1);
1640 t0 = _mm_shuffle_ps(mat[3], mat[2], _MM_SHUFFLE(1, 1, 1, 1));
1642 t1 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(2, 0, 0, 0));
1645 t0 = _mm_mul_ps(t1, b3b1b0b0);
1646 slice0 = _mm_add_ps(slice0, t0);
1649 slice0 = _mm_mul_ps(slice0,
PMPM);
1655 t0 = _mm_shuffle_ps(mat[1], mat[0], _MM_SHUFFLE(0, 0, 0, 0));
1657 t1 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(2, 2, 2, 0));
1660 slice1 = _mm_mul_ps(t1, b5b5b4b3);
1663 t0 = _mm_shuffle_ps(mat[2], mat[1], _MM_SHUFFLE(0, 0, 0, 0));
1666 t1 = _mm_mul_ps(t0, b4b2b2b1);
1667 slice1 = _mm_sub_ps(slice1, t1);
1670 t0 = _mm_shuffle_ps(mat[3], mat[2], _MM_SHUFFLE(0, 0, 0, 0));
1672 t1 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(2, 0, 0, 0));
1675 t0 = _mm_mul_ps(t1, b3b1b0b0);
1676 slice1 = _mm_add_ps(slice1, t0);
1679 slice1 = _mm_mul_ps(slice1,
MPMP);
1685 __m128 slice2, slice3;
1687 __m128 a5a5a4a3 = _mm_shuffle_ps(a0b0a5b5, a1a2a3a4, _MM_SHUFFLE(2, 3, 2, 2));
1688 __m128 a4a2a2a1 = _mm_shuffle_ps(a1a2a3a4, a1a2a3a4, _MM_SHUFFLE(0, 1, 1, 3));
1689 __m128 a3a1a0a0 = _mm_shuffle_ps(a1a2a3a4, a0b0a5b5, _MM_SHUFFLE(0, 0, 0, 2));
1694 t0 = _mm_shuffle_ps(mat[1], mat[0], _MM_SHUFFLE(3, 3, 3, 3));
1696 t1 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(2, 2, 2, 0));
1699 slice2 = _mm_mul_ps(t1, a5a5a4a3);
1702 t0 = _mm_shuffle_ps(mat[2], mat[1], _MM_SHUFFLE(3, 3, 3, 3));
1705 t1 = _mm_mul_ps(t0, a4a2a2a1);
1706 slice2 = _mm_sub_ps(slice2, t1);
1709 t0 = _mm_shuffle_ps(mat[3], mat[2], _MM_SHUFFLE(3, 3, 3, 3));
1711 t1 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(2, 0, 0, 0));
1714 t0 = _mm_mul_ps(t1, a3a1a0a0);
1715 slice2 = _mm_add_ps(slice2, t0);
1718 slice2 = _mm_mul_ps(slice2,
PMPM);
1724 t0 = _mm_shuffle_ps(mat[1], mat[0], _MM_SHUFFLE(2, 2, 2, 2));
1726 t1 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(2, 2, 2, 0));
1729 slice3 = _mm_mul_ps(t1, a5a5a4a3);
1732 t0 = _mm_shuffle_ps(mat[2], mat[1], _MM_SHUFFLE(2, 2, 2, 2));
1735 t1 = _mm_mul_ps(t0, a4a2a2a1);
1736 slice3 = _mm_sub_ps(slice3, t1);
1739 t0 = _mm_shuffle_ps(mat[3], mat[2], _MM_SHUFFLE(2, 2, 2, 2));
1741 t1 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(2, 0, 0, 0));
1744 t0 = _mm_mul_ps(t1, a3a1a0a0);
1745 slice3 = _mm_add_ps(slice3, t0);
1748 slice3 = _mm_mul_ps(slice3,
MPMP);
1763 t0 = _mm_shuffle_ps(slice0, slice1, _MM_SHUFFLE(0, 0, 0, 0));
1765 t1 = _mm_shuffle_ps(slice2, slice3, _MM_SHUFFLE(0, 0, 0, 0));
1767 t1 = _mm_shuffle_ps(t0, t1, _MM_SHUFFLE(2, 0, 2, 0));
1770 t0 = _mm_mul_ps(mat[0], t1);
1773 t1 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(2, 3, 0, 1));
1776 t0 = _mm_add_ps(t0, t1);
1779 t1 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(0, 0, 2, 2));
1781 *det = _mm_add_ps(t0, t1);
1791 t0 = _mm_shuffle_ps(a0b0a5b5, a1a2a3a4, _MM_SHUFFLE(2, 1, 2, 0));
1793 t1 = _mm_shuffle_ps(a0b0a5b5, b1b2b3b4, _MM_SHUFFLE(1, 2, 1, 3));
1795 t0 = _mm_mul_ps(t0, t1);
1797 t1 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(2, 3, 0, 1));
1799 t0 = _mm_add_ps(t0, t1);
1801 t1 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(0, 0, 2, 2));
1803 __m128 dot0 = _mm_add_ps(t0, t1);
1806 t0 = _mm_shuffle_ps(a1a2a3a4, a1a2a3a4, _MM_SHUFFLE(3, 0, 3, 0));
1808 t1 = _mm_shuffle_ps(b1b2b3b4, b1b2b3b4, _MM_SHUFFLE(0, 3, 0, 3));
1810 t0 = _mm_mul_ps(t0, t1);
1812 t1 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(0, 1, 0, 1));
1814 __m128 dot1 = _mm_add_ps(t0, t1);
1815 *det = _mm_sub_ps(dot0, dot1);
static __m128 SinApprDeg11(__m128 const x)
static Vector const C_COS_APPR_DEG10_1
static void MultiplyATB(__m128 const *A, __m128 const *B, __m128 *ATB)
static Vector const C_COS_APPR_DEG10_0
static __m128 Sin(__m128 const v)
static void GetAdjDet(__m128 const *mat, __m128 *adj, __m128 *det)
static __m128 ATan(__m128 const v)
static __m128 And(__m128 const v0, __m128 const v1)
static __m128 ACos(__m128 const v)
static __m128 Add(__m128 const v0, __m128 const v1)
Matrix2x2< Real > Adjoint(Matrix2x2< Real > const &M)
static __m128 Subtract(__m128 const v0, __m128 const v1)
static Vector const C_SIN_APPR_DEG11_4
Matrix & operator=(Matrix const &mat)
GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat t0
static __m128 Determinant(__m128 const *mat)
static Vector const C_SIN_APPR_DEG11_2
static __m128 ASin(__m128 const v)
static void MultiplyAB(__m128 const *A, __m128 const *B, __m128 *AB)
static __m128 Greater(__m128 const v0, __m128 const v1)
GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat s0
static void ReduceAnglesCos(__m128 const x, __m128 &y, __m128 &sign)
static __m128 Normalize(__m128 const v)
static __m128 Equal(__m128 const v0, __m128 const v1)
static Vector const C_COS_APPR_DEG6_3
GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat t1
static __m128 Less(__m128 const v0, __m128 const v1)
static Vector const C_COS_APPR_DEG10_2
static __m128 LessEqual(__m128 const v0, __m128 const v1)
GMatrix< Real > MultiplyMD(GMatrix< Real > const &M, GVector< Real > const &D)
static Vector const C_COS_APPR_DEG6_1
static Vector const C_SIN_APPR_DEG11_0
static __m128 NotEqual(__m128 const v0, __m128 const v1)
static Vector const C_SIN_APPR_DEG11_1
static __m128 Negate(__m128 const v)
static Vector const C_COS_APPR_DEG10_4
static __m128 CosApprDeg6(__m128 const x, __m128 const sign)
static Vector const C_SIN_APPR_DEG11_3
static __m128 GreaterEqual(__m128 const v0, __m128 const v1)
__m128 const & operator[](int i) const
static __m128 SinApprDeg7(__m128 const x)
GMatrix< Real > MultiplyATB(GMatrix< Real > const &A, GMatrix< Real > const &B)
static void MultiplyDM(__m128 const D, __m128 const *M, __m128 *DM)
static __m128 NormalizeRobustGetLength(__m128 const v, __m128 &length)
static __m128 QMultiply(__m128 const q0, __m128 const q1)
GMatrix< Real > Transpose(GMatrix< Real > const &M)
GLint GLenum GLboolean normalized
static void MultiplyABT(__m128 const *A, __m128 const *B, __m128 *ABT)
static __m128 MaximumAbsoluteComponent(__m128 const v)
static __m128 Xor(__m128 const v0, __m128 const v1)
static Vector const C_SIN_APPR_DEG7_3
static Vector const INV_TWO_PI
static __m128 Or(__m128 const v0, __m128 const v1)
static __m128 L1Norm(__m128 const *mat)
DualQuaternion< Real > Dot(DualQuaternion< Real > const &d0, DualQuaternion< Real > const &d1)
static __m128 Tan(__m128 const v)
static __m128 MultiplyVM(__m128 const V, __m128 const *M)
static __m128 Dot(__m128 const v0, __m128 const v1)
static void MultiplyMD(__m128 const *M, __m128 const D, __m128 *MD)
static Vector const NSIGN
Real Normalize(GVector< Real > &v, bool robust=false)
static __m128 CosApprDeg10(__m128 const x, __m128 const sign)
static __m128 Not(__m128 const v)
static __m128 QConjugate(__m128 const q)
DualQuaternion< Real > Cross(DualQuaternion< Real > const &d0, DualQuaternion< Real > const &d1)
static void MultiplyATBT(__m128 const *A, __m128 const *B, __m128 *ATBT)
static Vector const C_COS_APPR_DEG10_5
GMatrix< Real > MultiplyAB(GMatrix< Real > const &A, GMatrix< Real > const &B)
static Vector const C_SIN_APPR_DEG7_2
static __m128 Cross(__m128 const v0, __m128 const v1)
GLuint GLsizei GLsizei * length
static void Adjoint(__m128 const *mat, __m128 *adj)
static Vector const C_COS_APPR_DEG6_2
static Vector const HALF_PI
static void Transpose(__m128 const *mat, __m128 *trn)
static Vector const INV_PI
static __m128 Length(__m128 const v)
DualQuaternion< Real > Length(DualQuaternion< Real > const &d, bool robust=false)
static __m128 NormalizeRobust(__m128 const v)
GMatrix< Real > MultiplyATBT(GMatrix< Real > const &A, GMatrix< Real > const &B)
Real LInfinityNorm(GMatrix< Real > const &M)
static __m128 Round(__m128 const v)
static Vector const C_COS_APPR_DEG10_3
static Vector const TWO_PI
static __m128 LengthRobust(__m128 const v)
GMatrix< Real > MultiplyABT(GMatrix< Real > const &A, GMatrix< Real > const &B)
static __m128 QInverse(__m128 const q)
GLdouble GLdouble GLdouble GLdouble q
static Vector const C_SIN_APPR_DEG7_1
static __m128 Cos(__m128 const v)
Quaternion< Real > Inverse(Quaternion< Real > const &d)
static __m128 MultiplyMV(__m128 const *M, __m128 const V)
static Vector const C_SIN_APPR_DEG7_0
static void Inverse(__m128 const *mat, __m128 *inv)
Real L2Norm(GMatrix< Real > const &M)
GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat s1
static __m128 NormalizeGetLength(__m128 const v, __m128 &length)
static Vector const C_SIN_APPR_DEG11_5
static __m128 L2Norm(__m128 const *mat)
Real Determinant(GMatrix< Real > const &M)
static __m128 Select(__m128 const c, __m128 const v0, __m128 const v1)
static void ReduceAnglesSin(__m128 const x, __m128 &y)
static Vector const C_COS_APPR_DEG6_0
Vector & operator=(Vector const &vec)
static __m128 QSlerp(__m128 const t, __m128 const q0, __m128 const q1)
static __m128 Multiply(__m128 const v0, __m128 const v1)
static __m128 AndNot(__m128 const v0, __m128 const v1)
static Vector const NOFRC
Real L1Norm(GMatrix< Real > const &M)
static __m128 LInfinityNorm(__m128 const *mat)
GMatrix< Real > MultiplyDM(GVector< Real > const &D, GMatrix< Real > const &M)
static __m128 Divide(__m128 const v0, __m128 const v1)