00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #ifndef EIGEN_PACKET_MATH_ALTIVEC_H
00011 #define EIGEN_PACKET_MATH_ALTIVEC_H
00012
00013 namespace Eigen {
00014
00015 namespace internal {
00016
00017 #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
00018 #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4
00019 #endif
00020
00021 #ifndef EIGEN_HAS_FUSE_CJMADD
00022 #define EIGEN_HAS_FUSE_CJMADD 1
00023 #endif
00024
00025
00026 #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
00027 #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16
00028 #endif
00029
00030 typedef __vector float Packet4f;
00031 typedef __vector int Packet4i;
00032 typedef __vector unsigned int Packet4ui;
00033 typedef __vector __bool int Packet4bi;
00034 typedef __vector short int Packet8i;
00035 typedef __vector unsigned char Packet16uc;
00036
00037
00038
00039
00040 #define _EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \
00041 Packet4f p4f_##NAME = (Packet4f) vec_splat_s32(X)
00042
00043 #define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \
00044 Packet4i p4i_##NAME = vec_splat_s32(X)
00045
00046 #define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
00047 Packet4f p4f_##NAME = pset1<Packet4f>(X)
00048
00049 #define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
00050 Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1<int>(X))
00051
00052 #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
00053 Packet4i p4i_##NAME = pset1<Packet4i>(X)
00054
00055 #define DST_CHAN 1
00056 #define DST_CTRL(size, count, stride) (((size) << 24) | ((count) << 16) | (stride))
00057
00058
00059 static Packet4f p4f_COUNTDOWN = { 3.0, 2.0, 1.0, 0.0 };
00060 static Packet4i p4i_COUNTDOWN = { 3, 2, 1, 0 };
00061 static Packet16uc p16uc_REVERSE = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
00062 static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0);
00063 static Packet16uc p16uc_DUPLICATE = {0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7};
00064
00065 static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0);
00066 static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0);
00067 static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1);
00068 static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16);
00069 static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1);
00070 static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0);
00071 static Packet4f p4f_ZERO_ = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1);
00072
00073 template<> struct packet_traits<float> : default_packet_traits
00074 {
00075 typedef Packet4f type;
00076 enum {
00077 Vectorizable = 1,
00078 AlignedOnScalar = 1,
00079 size=4,
00080
00081
00082 HasSin = 0,
00083 HasCos = 0,
00084 HasLog = 0,
00085 HasExp = 0,
00086 HasSqrt = 0
00087 };
00088 };
00089 template<> struct packet_traits<int> : default_packet_traits
00090 {
00091 typedef Packet4i type;
00092 enum {
00093
00094 Vectorizable = 1,
00095 AlignedOnScalar = 1,
00096 size=4
00097 };
00098 };
00099
00100 template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; };
00101 template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; };
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147 template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
00148
00149 float EIGEN_ALIGN16 af[4];
00150 af[0] = from;
00151 Packet4f vc = vec_ld(0, af);
00152 vc = vec_splat(vc, 0);
00153 return vc;
00154 }
00155
00156 template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) {
00157 int EIGEN_ALIGN16 ai[4];
00158 ai[0] = from;
00159 Packet4i vc = vec_ld(0, ai);
00160 vc = vec_splat(vc, 0);
00161 return vc;
00162 }
00163
00164 template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a) { return vec_add(pset1<Packet4f>(a), p4f_COUNTDOWN); }
00165 template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a) { return vec_add(pset1<Packet4i>(a), p4i_COUNTDOWN); }
00166
00167 template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_add(a,b); }
00168 template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_add(a,b); }
00169
00170 template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_sub(a,b); }
00171 template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_sub(a,b); }
00172
00173 template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return psub<Packet4f>(p4f_ZERO, a); }
00174 template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return psub<Packet4i>(p4i_ZERO, a); }
00175
00176 template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_madd(a,b,p4f_ZERO); }
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196
00197
00198
00199
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213 template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
00214 {
00215 Packet4f t, y_0, y_1, res;
00216
00217
00218 y_0 = vec_re(b);
00219
00220
00221 t = vec_nmsub(y_0, b, p4f_ONE);
00222 y_1 = vec_madd(y_0, t, y_0);
00223
00224 res = vec_madd(a, y_1, p4f_ZERO);
00225 return res;
00226 }
00227
00228 template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& , const Packet4i& )
00229 { eigen_assert(false && "packet integer division are not supported by AltiVec");
00230 return pset1<Packet4i>(0);
00231 }
00232
00233
00234 template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a, b, c); }
00235 template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }
00236
00237 template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_min(a, b); }
00238 template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }
00239
00240 template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_max(a, b); }
00241 template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
00242
00243
00244 template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); }
00245 template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); }
00246
00247 template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_or(a, b); }
00248 template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_or(a, b); }
00249
00250 template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_xor(a, b); }
00251 template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_xor(a, b); }
00252
00253 template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, vec_nor(b, b)); }
00254 template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, vec_nor(b, b)); }
00255
00256 template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); }
00257 template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); }
00258
00259 template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
00260 {
00261 EIGEN_DEBUG_ALIGNED_LOAD
00262
00263 Packet16uc MSQ, LSQ;
00264 Packet16uc mask;
00265 MSQ = vec_ld(0, (unsigned char *)from);
00266 LSQ = vec_ld(15, (unsigned char *)from);
00267 mask = vec_lvsl(0, from);
00268 return (Packet4f) vec_perm(MSQ, LSQ, mask);
00269
00270 }
00271 template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
00272 {
00273 EIGEN_DEBUG_ALIGNED_LOAD
00274
00275 Packet16uc MSQ, LSQ;
00276 Packet16uc mask;
00277 MSQ = vec_ld(0, (unsigned char *)from);
00278 LSQ = vec_ld(15, (unsigned char *)from);
00279 mask = vec_lvsl(0, from);
00280 return (Packet4i) vec_perm(MSQ, LSQ, mask);
00281 }
00282
00283 template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
00284 {
00285 Packet4f p;
00286 if((ptrdiff_t(&from) % 16) == 0) p = pload<Packet4f>(from);
00287 else p = ploadu<Packet4f>(from);
00288 return vec_perm(p, p, p16uc_DUPLICATE);
00289 }
00290 template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
00291 {
00292 Packet4i p;
00293 if((ptrdiff_t(&from) % 16) == 0) p = pload<Packet4i>(from);
00294 else p = ploadu<Packet4i>(from);
00295 return vec_perm(p, p, p16uc_DUPLICATE);
00296 }
00297
00298 template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); }
00299 template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); }
00300
00301 template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from)
00302 {
00303 EIGEN_DEBUG_UNALIGNED_STORE
00304
00305
00306 Packet16uc MSQ, LSQ, edges;
00307 Packet16uc edgeAlign, align;
00308
00309 MSQ = vec_ld(0, (unsigned char *)to);
00310 LSQ = vec_ld(15, (unsigned char *)to);
00311 edgeAlign = vec_lvsl(0, to);
00312 edges=vec_perm(LSQ,MSQ,edgeAlign);
00313 align = vec_lvsr( 0, to );
00314 MSQ = vec_perm(edges,(Packet16uc)from,align);
00315 LSQ = vec_perm((Packet16uc)from,edges,align);
00316 vec_st( LSQ, 15, (unsigned char *)to );
00317 vec_st( MSQ, 0, (unsigned char *)to );
00318 }
00319 template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from)
00320 {
00321 EIGEN_DEBUG_UNALIGNED_STORE
00322
00323
00324 Packet16uc MSQ, LSQ, edges;
00325 Packet16uc edgeAlign, align;
00326
00327 MSQ = vec_ld(0, (unsigned char *)to);
00328 LSQ = vec_ld(15, (unsigned char *)to);
00329 edgeAlign = vec_lvsl(0, to);
00330 edges=vec_perm(LSQ, MSQ, edgeAlign);
00331 align = vec_lvsr( 0, to );
00332 MSQ = vec_perm(edges, (Packet16uc) from, align);
00333 LSQ = vec_perm((Packet16uc) from, edges, align);
00334 vec_st( LSQ, 15, (unsigned char *)to );
00335 vec_st( MSQ, 0, (unsigned char *)to );
00336 }
00337
00338 template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); }
00339 template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); }
00340
00341 template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; }
00342 template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; }
00343
00344 template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) { return (Packet4f)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE); }
00345 template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) { return (Packet4i)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE); }
00346
00347 template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vec_abs(a); }
00348 template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); }
00349
00350 template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
00351 {
00352 Packet4f b, sum;
00353 b = (Packet4f) vec_sld(a, a, 8);
00354 sum = vec_add(a, b);
00355 b = (Packet4f) vec_sld(sum, sum, 4);
00356 sum = vec_add(sum, b);
00357 return pfirst(sum);
00358 }
00359
00360 template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
00361 {
00362 Packet4f v[4], sum[4];
00363
00364
00365
00366
00367 v[0] = vec_mergeh(vecs[0], vecs[2]);
00368 v[1] = vec_mergel(vecs[0], vecs[2]);
00369 v[2] = vec_mergeh(vecs[1], vecs[3]);
00370 v[3] = vec_mergel(vecs[1], vecs[3]);
00371
00372 sum[0] = vec_mergeh(v[0], v[2]);
00373 sum[1] = vec_mergel(v[0], v[2]);
00374 sum[2] = vec_mergeh(v[1], v[3]);
00375 sum[3] = vec_mergel(v[1], v[3]);
00376
00377
00378
00379 sum[0] = vec_add(sum[0], sum[1]);
00380
00381 sum[1] = vec_add(sum[2], sum[3]);
00382
00383 sum[0] = vec_add(sum[0], sum[1]);
00384
00385 return sum[0];
00386 }
00387
00388 template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
00389 {
00390 Packet4i sum;
00391 sum = vec_sums(a, p4i_ZERO);
00392 sum = vec_sld(sum, p4i_ZERO, 12);
00393 return pfirst(sum);
00394 }
00395
00396 template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
00397 {
00398 Packet4i v[4], sum[4];
00399
00400
00401
00402
00403 v[0] = vec_mergeh(vecs[0], vecs[2]);
00404 v[1] = vec_mergel(vecs[0], vecs[2]);
00405 v[2] = vec_mergeh(vecs[1], vecs[3]);
00406 v[3] = vec_mergel(vecs[1], vecs[3]);
00407
00408 sum[0] = vec_mergeh(v[0], v[2]);
00409 sum[1] = vec_mergel(v[0], v[2]);
00410 sum[2] = vec_mergeh(v[1], v[3]);
00411 sum[3] = vec_mergel(v[1], v[3]);
00412
00413
00414
00415 sum[0] = vec_add(sum[0], sum[1]);
00416
00417 sum[1] = vec_add(sum[2], sum[3]);
00418
00419 sum[0] = vec_add(sum[0], sum[1]);
00420
00421 return sum[0];
00422 }
00423
00424
00425
00426 template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
00427 {
00428 Packet4f prod;
00429 prod = pmul(a, (Packet4f)vec_sld(a, a, 8));
00430 return pfirst(pmul(prod, (Packet4f)vec_sld(prod, prod, 4)));
00431 }
00432
00433 template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
00434 {
00435 EIGEN_ALIGN16 int aux[4];
00436 pstore(aux, a);
00437 return aux[0] * aux[1] * aux[2] * aux[3];
00438 }
00439
00440
00441 template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
00442 {
00443 Packet4f b, res;
00444 b = vec_min(a, vec_sld(a, a, 8));
00445 res = vec_min(b, vec_sld(b, b, 4));
00446 return pfirst(res);
00447 }
00448
00449 template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
00450 {
00451 Packet4i b, res;
00452 b = vec_min(a, vec_sld(a, a, 8));
00453 res = vec_min(b, vec_sld(b, b, 4));
00454 return pfirst(res);
00455 }
00456
00457
00458 template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
00459 {
00460 Packet4f b, res;
00461 b = vec_max(a, vec_sld(a, a, 8));
00462 res = vec_max(b, vec_sld(b, b, 4));
00463 return pfirst(res);
00464 }
00465
00466 template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
00467 {
00468 Packet4i b, res;
00469 b = vec_max(a, vec_sld(a, a, 8));
00470 res = vec_max(b, vec_sld(b, b, 4));
00471 return pfirst(res);
00472 }
00473
00474 template<int Offset>
00475 struct palign_impl<Offset,Packet4f>
00476 {
00477 static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
00478 {
00479 if (Offset!=0)
00480 first = vec_sld(first, second, Offset*4);
00481 }
00482 };
00483
00484 template<int Offset>
00485 struct palign_impl<Offset,Packet4i>
00486 {
00487 static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
00488 {
00489 if (Offset!=0)
00490 first = vec_sld(first, second, Offset*4);
00491 }
00492 };
00493
00494 }
00495
00496 }
00497
00498 #endif // EIGEN_PACKET_MATH_ALTIVEC_H