00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00037 #ifndef FCL_MULTIPLE_INTERSECT
00038 #define FCL_MULTIPLE_INTERSECT
00039
00040 #include "fcl/math/vec_3f.h"
00041
00042 #include <xmmintrin.h>
00043 #include <pmmintrin.h>
00044
00045
00046 namespace fcl
00047 {
00048
00049
00050 static inline __m128 sse_four_spheres_intersect(const Vec3f& o1, FCL_REAL r1,
00051 const Vec3f& o2, FCL_REAL r2,
00052 const Vec3f& o3, FCL_REAL r3,
00053 const Vec3f& o4, FCL_REAL r4,
00054 const Vec3f& o5, FCL_REAL r5,
00055 const Vec3f& o6, FCL_REAL r6,
00056 const Vec3f& o7, FCL_REAL r7,
00057 const Vec3f& o8, FCL_REAL r8)
00058 {
00059 __m128 PX, PY, PZ, PR, QX, QY, QZ, QR;
00060 PX = _mm_set_ps(o1[0], o2[0], o3[0], o4[0]);
00061 PY = _mm_set_ps(o1[1], o2[1], o3[1], o4[1]);
00062 PZ = _mm_set_ps(o1[2], o2[2], o3[2], o4[2]);
00063 PR = _mm_set_ps(r1, r2, r3, r4);
00064 QX = _mm_set_ps(o5[0], o6[0], o7[0], o8[0]);
00065 QY = _mm_set_ps(o5[1], o6[1], o7[1], o8[1]);
00066 QZ = _mm_set_ps(o5[2], o6[2], o7[2], o8[2]);
00067 QR = _mm_set_ps(r5, r6, r7, r8);
00068
00069 __m128 T1 = _mm_sub_ps(PX, QX);
00070 __m128 T2 = _mm_sub_ps(PY, QY);
00071 __m128 T3 = _mm_sub_ps(PZ, QZ);
00072 __m128 T4 = _mm_add_ps(PR, QR);
00073 T1 = _mm_mul_ps(T1, T1);
00074 T2 = _mm_mul_ps(T2, T2);
00075 T3 = _mm_mul_ps(T3, T3);
00076 T4 = _mm_mul_ps(T4, T4);
00077 T1 = _mm_add_ps(T1, T2);
00078 T2 = _mm_sub_ps(R2, T3);
00079 return _mm_cmple_ps(T1, T2);
00080 }
00081
00082
00083 static inline __m128 sse_four_spheres_four_AABBs_intersect(const Vec3f& o1, FCL_REAL r1,
00084 const Vec3f& o2, FCL_REAL r2,
00085 const Vec3f& o3, FCL_REAL r3,
00086 const Vec3f& o4, FCL_REAL r4,
00087 const Vec3f& min1, const Vec3f& max1,
00088 const Vec3f& min2, const Vec3f& max2,
00089 const Vec3f& min3, const Vec3f& max3,
00090 const Vec3f& min4, const Vec3f& max4)
00091 {
00092 __m128 MINX, MINY, MINZ, MAXX, MAXX, MAXY, MAXZ, SX, SY, SZ, SR;
00093 MINX = _mm_set_ps(min1[0], min2[0], min3[0], min4[0]);
00094 MAXX = _mm_set_ps(max1[0], max2[0], max3[0], max4[0]);
00095 MINY = _mm_set_ps(min1[1], min2[1], min3[1], min4[1]);
00096 MAXY = _mm_set_ps(max1[1], max2[1], max3[1], max4[1]);
00097 MINZ = _mm_set_ps(min1[2], min2[2], min3[2], min4[2]);
00098 MAXZ = _mm_set_ps(max1[2], max2[2], max3[2], max4[2]);
00099 SX = _mm_set_ps(o1[0], o2[0], o3[0], o4[0]);
00100 SY = _mm_set_ps(o1[1], o2[1], o3[1], o4[1]);
00101 SZ = _mm_set_ps(o1[2], o2[2], o3[2], o4[2]);
00102 SR = _mm_set_ps(r1, r2, r3, r4);
00103
00104
00105 __m128 TX = _mm_max_ps(SX, MINX);
00106 __m128 TY = _mm_max_ps(SY, MINY);
00107 __m128 TZ = _mm_max_ps(SZ, MINZ);
00108 TX = _mm_min_ps(TX, MAXX);
00109 TY = _mm_min_ps(TY, MAXY);
00110 TZ = _mm_min_ps(TZ, MAXZ);
00111 __m128 DX = _mm_sub_ps(SX, TX);
00112 __m128 DY = _mm_sub_ps(SY, TY);
00113 __m128 DZ = _mm_sub_ps(SZ, TZ);
00114 __m128 R2 = _mm_mul_ps(SR, SR);
00115 DX = _mm_mul_ps(DX, DX);
00116 DY = _mm_mul_ps(DY, DY);
00117 DZ = _mm_mul_ps(DZ, DZ);
00118 __m128 T1 = _mm_add_ps(DX, DY);
00119 __m128 T2 = _mm_sub_ps(R2, DZ);
00120 return _mm_cmple_ps(T1, T2);
00121 }
00122
00123
00124 static inline __m128 sse_four_AABBs_intersect(const Vec3f& min1, const Vec3f& max1,
00125 const Vec3f& min2, const Vec3f& max2,
00126 const Vec3f& min3, const Vec3f& max3,
00127 const Vec3f& min4, const Vec3f& max4,
00128 const Vec3f& min5, const Vec3f& max5,
00129 const Vec3f& min6, const Vec3f& max6,
00130 const Vec3f& min7, const Vec3f& max7,
00131 const Vec3f& min8, const Vec3f& max8)
00132 {
00133 __m128 MIN1X, MIN1Y, MIN1Z, MAX1X, MAX1Y, MAX1Z, MIN2X, MIN2Y, MIN2Z, MAX2X, MAX2Y, MAX2Z;
00134 MIN1X = _mm_set_ps(min1[0], min2[0], min3[0], min4[0]);
00135 MAX1X = _mm_set_ps(max1[0], max2[0], max3[0], max4[0]);
00136 MIN1Y = _mm_set_ps(min1[1], min2[1], min3[1], min4[1]);
00137 MAX1Y = _mm_set_ps(max1[1], max2[1], max3[1], max4[1]);
00138 MIN1Z = _mm_set_ps(min1[2], min2[2], min3[2], min4[2]);
00139 MAX1Z = _mm_set_ps(max1[2], max2[2], max3[2], max4[2]);
00140 MIN2X = _mm_set_ps(min5[0], min6[0], min7[0], min8[0]);
00141 MAX2X = _mm_set_ps(max5[0], max6[0], max7[0], max8[0]);
00142 MIN2Y = _mm_set_ps(min5[1], min6[1], min7[1], min8[1]);
00143 MAX2Y = _mm_set_ps(max5[1], max6[1], max7[1], max8[1]);
00144 MIN2Z = _mm_set_ps(min5[2], min6[2], min7[2], min8[2]);
00145 MAX2Z = _mm_set_ps(max5[2], max6[2], max7[2], max8[2]);
00146
00147 __m128 AX = _mm_max_ps(MIN1X, MIN2X);
00148 __m128 BX = _mm_min_ps(MAX1X, MAX2X);
00149 __m128 AY = _mm_max_ps(MIN1Y, MIN2Y);
00150 __m128 BY = _mm_min_ps(MAX1Y, MAX2Y);
00151 __m128 AZ = _mm_max_ps(MIN1Z, MIN2Z);
00152 __m128 BZ = _mm_min_ps(MAX1Z, MAX2Z);
00153 __m128 T1 = _mm_cmple_ps(AX, BX);
00154 __m128 T2 = _mm_cmple_ps(AY, BY);
00155 __m128 T3 = _mm_cmple_ps(AZ, BZ);
00156 __m128 T4 = _mm_and_ps(T1, T2);
00157 return _mm_and_ps(T3, T4);
00158 }
00159
00160 static bool four_spheres_intersect_and(const Vec3f& o1, FCL_REAL r1,
00161 const Vec3f& o2, FCL_REAL r2,
00162 const Vec3f& o3, FCL_REAL r3,
00163 const Vec3f& o4, FCL_REAL r4,
00164 const Vec3f& o5, FCL_REAL r5,
00165 const Vec3f& o6, FCL_REAL r6,
00166 const Vec3f& o7, FCL_REAL r7,
00167 const Vec3f& o8, FCL_REAL r8)
00168 {
00169 __m128 CMP = four_spheres_intersect(o1, r1, o2, r2, o3, r3, o4, r4, o5, r5, o6, r6, o7, r7, o8, r8);
00170 return _mm_movemask_ps(CMP) == 15.f;
00171 }
00172
00173 static bool four_spheres_intersect_or(const Vec3f& o1, FCL_REAL r1,
00174 const Vec3f& o2, FCL_REAL r2,
00175 const Vec3f& o3, FCL_REAL r3,
00176 const Vec3f& o4, FCL_REAL r4,
00177 const Vec3f& o5, FCL_REAL r5,
00178 const Vec3f& o6, FCL_REAL r6,
00179 const Vec3f& o7, FCL_REAL r7,
00180 const Vec3f& o8, FCL_REAL r8)
00181 {
00182 __m128 CMP = four_spheres_intersect(o1, r1, o2, r2, o3, r3, o4, r4, o5, r5, o6, r6, o7, r7, o8, r8);
00183 return __mm_movemask_ps(CMP) > 0;
00184 }
00185
00187 static bool four_spheres_four_AABBs_intersect_and(const Vec3f& o1, FCL_REAL r1,
00188 const Vec3f& o2, FCL_REAL r2,
00189 const Vec3f& o3, FCL_REAL r3,
00190 const Vec3f& o4, FCL_REAL r4,
00191 const Vec3f& min1, const Vec3f& max1,
00192 const Vec3f& min2, const Vec3f& max2,
00193 const Vec3f& min3, const Vec3f& max3,
00194 const Vec3f& min4, const Vec3f& max4)
00195 {
00196 __m128 CMP = four_spheres_four_AABBs_intersect(o1, r1, o2, r2, o3, r3, o4, r4, min1, max1, min2, max2, min3, max3, min4, max4);
00197 return _mm_movemask_ps(CMP) == 15.f;
00198 }
00199
00200 static bool four_spheres_four_AABBs_intersect_or(const Vec3f& o1, FCL_REAL r1,
00201 const Vec3f& o2, FCL_REAL r2,
00202 const Vec3f& o3, FCL_REAL r3,
00203 const Vec3f& o4, FCL_REAL r4,
00204 const Vec3f& min1, const Vec3f& max1,
00205 const Vec3f& min2, const Vec3f& max2,
00206 const Vec3f& min3, const Vec3f& max3,
00207 const Vec3f& min4, const Vec3f& max4)
00208 {
00209 __m128 CMP = four_spheres_four_AABBs_intersect(o1, r1, o2, r2, o3, r3, o4, r4, min1, max1, min2, max2, min3, max3, min4, max4);
00210 return _mm_movemask_ps(CMP) > 0;
00211 }
00212
00214 static bool four_AABBs_intersect_and(const Vec3f& min1, const Vec3f& max1,
00215 const Vec3f& min2, const Vec3f& max2,
00216 const Vec3f& min3, const Vec3f& max3,
00217 const Vec3f& min4, const Vec3f& max4,
00218 const Vec3f& min5, const Vec3f& max5,
00219 const Vec3f& min6, const Vec3f& max6,
00220 const Vec3f& min7, const Vec3f& max7,
00221 const Vec3f& min8, const Vec3f& max8)
00222 {
00223 __m128 CMP = four_AABBs_intersect(min1, max1, min2, max2, min3, max3, min4, max4, min5, max5, min6, max6, min7, max7, min8, max8);
00224 return _mm_movemask_ps(CMP) == 15.f;
00225 }
00226
00227 static bool four_AABBs_intersect_or(const Vec3f& min1, const Vec3f& max1,
00228 const Vec3f& min2, const Vec3f& max2,
00229 const Vec3f& min3, const Vec3f& max3,
00230 const Vec3f& min4, const Vec3f& max4,
00231 const Vec3f& min5, const Vec3f& max5,
00232 const Vec3f& min6, const Vec3f& max6,
00233 const Vec3f& min7, const Vec3f& max7,
00234 const Vec3f& min8, const Vec3f& max8)
00235 {
00236 __m128 CMP = four_AABBs_intersect(min1, max1, min2, max2, min3, max3, min4, max4, min5, max5, min6, max6, min7, max7, min8, max8);
00237 return _mm_movemask_ps(CMP) > 0;
00238 }
00239
00240 }
00241
00242 #endif