00001
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #if ! defined(VL_MATHOP_AVX_INSTANTIATING)
00016
00017 #include "mathop_avx.h"
00018
00019 #undef FLT
00020 #define FLT VL_TYPE_DOUBLE
00021 #define VL_MATHOP_AVX_INSTANTIATING
00022 #include "mathop_avx.c"
00023
00024 #undef FLT
00025 #define FLT VL_TYPE_FLOAT
00026 #define VL_MATHOP_AVX_INSTANTIATING
00027 #include "mathop_avx.c"
00028
00029
00030
00031 #else
00032 #ifndef VL_DISABLE_AVX
00033
00034 #ifndef __AVX__
00035 #error Compiling AVX functions but AVX does not seem to be supported by the compiler.
00036 #endif
00037
00038 #include <immintrin.h>
00039 #include "generic.h"
00040 #include "mathop.h"
00041 #include "float.th"
00042
00043 VL_INLINE T
00044 VL_XCAT(_vl_vhsum_avx_, SFX)(VTYPEavx x)
00045 {
00046 T acc ;
00047 #if (VSIZEavx == 8)
00048 {
00049
00050
00051
00052 VTYPEavx hsum = VHADD2avx(x, x);
00053 hsum = VADDavx(hsum, VPERMavx(hsum, hsum, 0x1));
00054 VST1(&acc, VHADDavx( VCSTavx(hsum), VCSTavx(hsum) ) );
00055 }
00056 #else
00057 {
00058
00059 VTYPEavx hsum = VADDavx(x, VPERMavx(x, x, 0x1));
00060
00061
00062 VST1(&acc, VHADDavx( VCSTavx(hsum), VCSTavx(hsum) ) );
00063 }
00064 #endif
00065 return acc ;
00066 }
00067
00068 VL_EXPORT T
00069 VL_XCAT(_vl_distance_l2_avx_, SFX)
00070 (vl_size dimension, T const * X, T const * Y)
00071 {
00072
00073 T const * X_end = X + dimension ;
00074 T const * X_vec_end = X_end - VSIZEavx + 1 ;
00075 T acc ;
00076 VTYPEavx vacc = VSTZavx() ;
00077 vl_bool dataAligned = VALIGNEDavx(X) & VALIGNEDavx(Y) ;
00078
00079 if (dataAligned) {
00080 while (X < X_vec_end) {
00081 VTYPEavx a = *(VTYPEavx*)X ;
00082 VTYPEavx b = *(VTYPEavx*)Y ;
00083 VTYPEavx delta = VSUBavx(a, b) ;
00084 VTYPEavx delta2 = VMULavx(delta, delta) ;
00085 vacc = VADDavx(vacc, delta2) ;
00086 X += VSIZEavx ;
00087 Y += VSIZEavx ;
00088 }
00089 } else {
00090 while (X < X_vec_end) {
00091 VTYPEavx a = VLDUavx(X) ;
00092 VTYPEavx b = VLDUavx(Y) ;
00093 VTYPEavx delta = VSUBavx(a, b) ;
00094 VTYPEavx delta2 = VMULavx(delta, delta) ;
00095 vacc = VADDavx(vacc, delta2) ;
00096 X += VSIZEavx ;
00097 Y += VSIZEavx ;
00098 }
00099 }
00100
00101 acc = VL_XCAT(_vl_vhsum_avx_, SFX)(vacc) ;
00102
00103 while (X < X_end) {
00104 T a = *X++ ;
00105 T b = *Y++ ;
00106 T delta = a - b ;
00107 acc += delta * delta ;
00108 }
00109
00110 return acc ;
00111 }
00112
00113 VL_EXPORT T
00114 VL_XCAT(_vl_distance_mahalanobis_sq_avx_, SFX)
00115 (vl_size dimension, T const * X, T const * MU, T const * S)
00116 {
00117 T const * X_end = X + dimension ;
00118 T const * X_vec_end = X_end - VSIZEavx + 1 ;
00119 T acc ;
00120 VTYPEavx vacc = VSTZavx() ;
00121 vl_bool dataAligned = VALIGNEDavx(X) & VALIGNEDavx(MU) & VALIGNEDavx(S);
00122
00123 if (dataAligned) {
00124 while (X < X_vec_end) {
00125 VTYPEavx a = *(VTYPEavx*)X ;
00126 VTYPEavx b = *(VTYPEavx*)MU ;
00127 VTYPEavx c = *(VTYPEavx*)S ;
00128
00129 VTYPEavx delta = VSUBavx(a, b) ;
00130 VTYPEavx delta2 = VMULavx(delta, delta) ;
00131 VTYPEavx delta2div = VMULavx(delta2,c);
00132
00133 vacc = VADDavx(vacc, delta2div) ;
00134
00135 X += VSIZEavx ;
00136 MU += VSIZEavx ;
00137 S += VSIZEavx ;
00138 }
00139 } else {
00140 while (X < X_vec_end) {
00141
00142 VTYPEavx a = VLDUavx(X) ;
00143 VTYPEavx b = VLDUavx(MU) ;
00144 VTYPEavx c = VLDUavx(S) ;
00145
00146 VTYPEavx delta = VSUBavx(a, b) ;
00147 VTYPEavx delta2 = VMULavx(delta, delta) ;
00148 VTYPEavx delta2div = VMULavx(delta2,c);
00149
00150 vacc = VADDavx(vacc, delta2div) ;
00151
00152 X += VSIZEavx ;
00153 MU += VSIZEavx ;
00154 S += VSIZEavx ;
00155 }
00156 }
00157
00158 acc = VL_XCAT(_vl_vhsum_avx_, SFX)(vacc) ;
00159
00160 while (X < X_end) {
00161 T a = *X++ ;
00162 T b = *MU++ ;
00163 T c = *S++ ;
00164 T delta = a - b ;
00165 acc += (delta * delta) * c;
00166 }
00167
00168 return acc ;
00169 }
00170
00171 VL_EXPORT void
00172 VL_XCAT(_vl_weighted_mean_avx_, SFX)
00173 (vl_size dimension, T * MU, T const * X, T const W)
00174 {
00175 T const * X_end = X + dimension ;
00176 T const * X_vec_end = X_end - VSIZEavx + 1 ;
00177
00178 vl_bool dataAligned = VALIGNEDavx(X) & VALIGNEDavx(MU);
00179 VTYPEavx w = VLD1avx (&W) ;
00180
00181 if (dataAligned) {
00182 while (X < X_vec_end) {
00183 VTYPEavx a = *(VTYPEavx*)X ;
00184 VTYPEavx mu = *(VTYPEavx*)MU ;
00185
00186 VTYPEavx aw = VMULavx(a, w) ;
00187 VTYPEavx meanStore = VADDavx(aw, mu);
00188
00189 *(VTYPEavx *)MU = meanStore;
00190
00191 X += VSIZEavx ;
00192 MU += VSIZEavx ;
00193 }
00194 } else {
00195 while (X < X_vec_end) {
00196 VTYPEavx a = VLDUavx(X) ;
00197 VTYPEavx mu = VLDUavx(MU) ;
00198
00199 VTYPEavx aw = VMULavx(a, w) ;
00200 VTYPEavx meanStore = VADDavx(aw, mu);
00201
00202 VST2Uavx(MU,meanStore);
00203
00204 X += VSIZEavx ;
00205 MU += VSIZEavx ;
00206 }
00207 }
00208
00209 while (X < X_end) {
00210 T a = *X++ ;
00211 *MU += a * W ;
00212 MU++;
00213 }
00214 }
00215
00216 VL_EXPORT void
00217 VL_XCAT(_vl_weighted_sigma_avx_, SFX)
00218 (vl_size dimension, T * S, T const * X, T const * Y, T const W)
00219 {
00220 T const * X_end = X + dimension ;
00221 T const * X_vec_end = X_end - VSIZEavx + 1 ;
00222
00223 vl_bool dataAligned = VALIGNEDavx(X) & VALIGNEDavx(Y) & VALIGNEDavx(S);
00224
00225 VTYPEavx w = VLD1avx (&W) ;
00226
00227 if (dataAligned) {
00228 while (X < X_vec_end) {
00229 VTYPEavx a = *(VTYPEavx*)X ;
00230 VTYPEavx b = *(VTYPEavx*)Y ;
00231 VTYPEavx s = *(VTYPEavx*)S ;
00232
00233 VTYPEavx delta = VSUBavx(a, b) ;
00234 VTYPEavx delta2 = VMULavx(delta, delta) ;
00235 VTYPEavx delta2w = VMULavx(delta2, w) ;
00236 VTYPEavx sigmaStore = VADDavx(s,delta2w);
00237
00238 *(VTYPEavx *)S = sigmaStore;
00239
00240 X += VSIZEavx ;
00241 Y += VSIZEavx ;
00242 S += VSIZEavx ;
00243 }
00244 } else {
00245 while (X < X_vec_end) {
00246 VTYPEavx a = VLDUavx(X) ;
00247 VTYPEavx b = VLDUavx(Y) ;
00248 VTYPEavx s = VLDUavx(S) ;
00249
00250 VTYPEavx delta = VSUBavx(a, b) ;
00251 VTYPEavx delta2 = VMULavx(delta, delta) ;
00252 VTYPEavx delta2w = VMULavx(delta2, w) ;
00253 VTYPEavx sigmaStore = VADDavx(s,delta2w);
00254
00255 VST2Uavx(S,sigmaStore);
00256
00257 X += VSIZEavx ;
00258 Y += VSIZEavx ;
00259 S += VSIZEavx ;
00260 }
00261 }
00262
00263 while (X < X_end) {
00264 T a = *X++ ;
00265 T b = *Y++ ;
00266 T delta = a - b ;
00267 *S += ((delta * delta)*W) ;
00268 S++;
00269 }
00270 }
00271
00272
00273 #endif
00274 #undef VL_MATHOP_AVX_INSTANTIATING
00275 #endif