10 #ifndef EIGEN_PACKET_MATH_ALTIVEC_H 11 #define EIGEN_PACKET_MATH_ALTIVEC_H 17 #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 18 #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4 21 #ifndef EIGEN_HAS_FUSE_CJMADD 22 #define EIGEN_HAS_FUSE_CJMADD 1 26 #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 27 #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16 40 #define _EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \ 41 Packet4f p4f_##NAME = (Packet4f) vec_splat_s32(X) 43 #define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \ 44 Packet4i p4i_##NAME = vec_splat_s32(X) 46 #define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \ 47 Packet4f p4f_##NAME = pset1<Packet4f>(X) 49 #define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \ 50 Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1<int>(X)) 52 #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ 53 Packet4i p4i_##NAME = pset1<Packet4i>(X) 56 #define DST_CTRL(size, count, stride) (((size) << 24) | ((count) << 16) | (stride)) 61 static Packet16uc
p16uc_REVERSE = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
63 static Packet16uc
p16uc_DUPLICATE = {0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7};
70 static Packet4f
p4f_ONE = vec_ctf(p4i_ONE, 0);
151 Packet4f vc = vec_ld(0, af);
152 vc = vec_splat(vc, 0);
159 Packet4i vc = vec_ld(0, ai);
160 vc = vec_splat(vc, 0);
218 Packet4f t, y_0, y_1, res;
224 t = vec_nmsub(y_0, b, p4f_ONE);
225 y_1 = vec_madd(y_0, t, y_0);
227 res = vec_madd(a, y_1, p4f_ZERO);
232 {
eigen_assert(
false &&
"packet integer division are not supported by AltiVec");
237 template<>
EIGEN_STRONG_INLINE Packet4f
pmadd(
const Packet4f& a,
const Packet4f& b,
const Packet4f& c) {
return vec_madd(a, b, c); }
268 MSQ = vec_ld(0, (
unsigned char *)from);
269 LSQ = vec_ld(15, (
unsigned char *)from);
270 mask = vec_lvsl(0, from);
271 return (Packet4f) vec_perm(MSQ, LSQ, mask);
280 MSQ = vec_ld(0, (
unsigned char *)from);
281 LSQ = vec_ld(15, (
unsigned char *)from);
282 mask = vec_lvsl(0, from);
283 return (Packet4i) vec_perm(MSQ, LSQ, mask);
291 return vec_perm(p, p, p16uc_DUPLICATE);
298 return vec_perm(p, p, p16uc_DUPLICATE);
309 Packet16uc MSQ, LSQ, edges;
310 Packet16uc edgeAlign, align;
312 MSQ = vec_ld(0, (
unsigned char *)to);
313 LSQ = vec_ld(15, (
unsigned char *)to);
314 edgeAlign = vec_lvsl(0, to);
315 edges=vec_perm(LSQ,MSQ,edgeAlign);
316 align = vec_lvsr( 0, to );
317 MSQ = vec_perm(edges,(Packet16uc)from,align);
318 LSQ = vec_perm((Packet16uc)from,edges,align);
319 vec_st( LSQ, 15, (
unsigned char *)to );
320 vec_st( MSQ, 0, (
unsigned char *)to );
327 Packet16uc MSQ, LSQ, edges;
328 Packet16uc edgeAlign, align;
330 MSQ = vec_ld(0, (
unsigned char *)to);
331 LSQ = vec_ld(15, (
unsigned char *)to);
332 edgeAlign = vec_lvsl(0, to);
333 edges=vec_perm(LSQ, MSQ, edgeAlign);
334 align = vec_lvsr( 0, to );
335 MSQ = vec_perm(edges, (Packet16uc) from, align);
336 LSQ = vec_perm((Packet16uc) from, edges, align);
337 vec_st( LSQ, 15, (
unsigned char *)to );
338 vec_st( MSQ, 0, (
unsigned char *)to );
358 b = (
Packet4f) vec_sld(sum, sum, 4);
359 sum = vec_add(sum, b);
365 Packet4f
v[4], sum[4];
370 v[0] = vec_mergeh(vecs[0], vecs[2]);
371 v[1] = vec_mergel(vecs[0], vecs[2]);
372 v[2] = vec_mergeh(vecs[1], vecs[3]);
373 v[3] = vec_mergel(vecs[1], vecs[3]);
375 sum[0] = vec_mergeh(v[0], v[2]);
376 sum[1] = vec_mergel(v[0], v[2]);
377 sum[2] = vec_mergeh(v[1], v[3]);
378 sum[3] = vec_mergel(v[1], v[3]);
382 sum[0] = vec_add(sum[0], sum[1]);
384 sum[1] = vec_add(sum[2], sum[3]);
386 sum[0] = vec_add(sum[0], sum[1]);
394 sum = vec_sums(a, p4i_ZERO);
395 sum = vec_sld(sum, p4i_ZERO, 12);
401 Packet4i
v[4], sum[4];
406 v[0] = vec_mergeh(vecs[0], vecs[2]);
407 v[1] = vec_mergel(vecs[0], vecs[2]);
408 v[2] = vec_mergeh(vecs[1], vecs[3]);
409 v[3] = vec_mergel(vecs[1], vecs[3]);
411 sum[0] = vec_mergeh(v[0], v[2]);
412 sum[1] = vec_mergel(v[0], v[2]);
413 sum[2] = vec_mergeh(v[1], v[3]);
414 sum[3] = vec_mergel(v[1], v[3]);
418 sum[0] = vec_add(sum[0], sum[1]);
420 sum[1] = vec_add(sum[2], sum[3]);
422 sum[0] = vec_add(sum[0], sum[1]);
432 prod =
pmul(a, (Packet4f)vec_sld(a, a, 8));
433 return pfirst(
pmul(prod, (Packet4f)vec_sld(prod, prod, 4)));
440 return aux[0] * aux[1] * aux[2] * aux[3];
447 b = vec_min(a, vec_sld(a, a, 8));
448 res = vec_min(b, vec_sld(b, b, 4));
455 b = vec_min(a, vec_sld(a, a, 8));
456 res = vec_min(b, vec_sld(b, b, 4));
464 b = vec_max(a, vec_sld(a, a, 8));
465 res = vec_max(b, vec_sld(b, b, 4));
472 b = vec_max(a, vec_sld(a, a, 8));
473 res = vec_max(b, vec_sld(b, b, 4));
483 first = vec_sld(first, second, Offset*4);
493 first = vec_sld(first, second, Offset*4);
501 #endif // EIGEN_PACKET_MATH_ALTIVEC_H
EIGEN_STRONG_INLINE Packet4i ploaddup< Packet4i >(const int *from)
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0)
EIGEN_STRONG_INLINE Packet4f pxor< Packet4f >(const Packet4f &a, const Packet4f &b)
__vector unsigned char Packet16uc
EIGEN_STRONG_INLINE int pfirst< Packet4i >(const Packet4i &a)
#define EIGEN_STRONG_INLINE
EIGEN_STRONG_INLINE Packet4i pload< Packet4i >(const int *from)
EIGEN_STRONG_INLINE float pfirst< Packet4f >(const Packet4f &a)
EIGEN_STRONG_INLINE Packet4f ploaddup< Packet4f >(const float *from)
EIGEN_STRONG_INLINE int predux< Packet4i >(const Packet4i &a)
#define EIGEN_DEBUG_ALIGNED_STORE
iterative scaling algorithm to equilibrate rows and column norms in matrices
static EIGEN_STRONG_INLINE void run(Packet4i &first, const Packet4i &second)
EIGEN_STRONG_INLINE Packet4i ploadu< Packet4i >(const int *from)
EIGEN_STRONG_INLINE float predux_max< Packet4f >(const Packet4f &a)
static Packet16uc p16uc_FORWARD
static EIGEN_STRONG_INLINE void run(Packet4f &first, const Packet4f &second)
#define EIGEN_DEBUG_UNALIGNED_STORE
EIGEN_STRONG_INLINE void prefetch< float >(const float *addr)
static Packet16uc p16uc_REVERSE
EIGEN_STRONG_INLINE Packet4i pdiv< Packet4i >(const Packet4i &, const Packet4i &)
EIGEN_STRONG_INLINE int predux_min< Packet4i >(const Packet4i &a)
__vector __bool int Packet4bi
#define EIGEN_DEBUG_ALIGNED_LOAD
void pstore(Scalar *to, const Packet &from)
EIGEN_STRONG_INLINE Packet4i plset< int >(const int &a)
EIGEN_STRONG_INLINE Packet4f plset< float >(const float &a)
static Packet4i p4i_COUNTDOWN
EIGEN_STRONG_INLINE Packet4f pdiv< Packet4f >(const Packet4f &a, const Packet4f &b)
static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0)
EIGEN_STRONG_INLINE Packet4f pmin< Packet4f >(const Packet4f &a, const Packet4f &b)
EIGEN_STRONG_INLINE void prefetch< int >(const int *addr)
EIGEN_STRONG_INLINE Packet4i pand< Packet4i >(const Packet4i &a, const Packet4i &b)
EIGEN_STRONG_INLINE int predux_max< Packet4i >(const Packet4i &a)
EIGEN_STRONG_INLINE Packet4i pmin< Packet4i >(const Packet4i &a, const Packet4i &b)
EIGEN_STRONG_INLINE Packet4f ploadu< Packet4f >(const float *from)
EIGEN_STRONG_INLINE Packet4i pxor< Packet4i >(const Packet4i &a, const Packet4i &b)
EIGEN_STRONG_INLINE void pstoreu< int >(int *to, const Packet4i &from)
EIGEN_STRONG_INLINE void pstore< float >(float *to, const Packet4f &from)
EIGEN_STRONG_INLINE Packet4f por< Packet4f >(const Packet4f &a, const Packet4f &b)
unpacket_traits< Packet >::type pfirst(const Packet &a)
EIGEN_STRONG_INLINE Packet4i preduxp< Packet4i >(const Packet4i *vecs)
__vector short int Packet8i
EIGEN_STRONG_INLINE Packet4f pandnot< Packet4f >(const Packet4f &a, const Packet4f &b)
EIGEN_STRONG_INLINE float predux< Packet4f >(const Packet4f &a)
EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf &a)
EIGEN_STRONG_INLINE float predux_min< Packet4f >(const Packet4f &a)
EIGEN_STRONG_INLINE void pstoreu< float >(float *to, const Packet4f &from)
static Packet4f p4f_ZERO_
#define DST_CTRL(size, count, stride)
EIGEN_STRONG_INLINE int predux_mul< Packet4i >(const Packet4i &a)
EIGEN_STRONG_INLINE Packet4f pmul< Packet4f >(const Packet4f &a, const Packet4f &b)
EIGEN_STRONG_INLINE Packet4f pload< Packet4f >(const float *from)
EIGEN_STRONG_INLINE Packet4i pmax< Packet4i >(const Packet4i &a, const Packet4i &b)
EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf &a)
EIGEN_STRONG_INLINE Packet4i por< Packet4i >(const Packet4i &a, const Packet4i &b)
static Packet4f p4f_COUNTDOWN
EIGEN_STRONG_INLINE Packet4i padd< Packet4i >(const Packet4i &a, const Packet4i &b)
EIGEN_STRONG_INLINE Packet4f padd< Packet4f >(const Packet4f &a, const Packet4f &b)
EIGEN_STRONG_INLINE void pstore< int >(int *to, const Packet4i &from)
EIGEN_STRONG_INLINE Packet4i pandnot< Packet4i >(const Packet4i &a, const Packet4i &b)
__vector unsigned int Packet4ui
EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Packet pmul(const Packet &a, const Packet &b)
EIGEN_STRONG_INLINE Packet4f preduxp< Packet4f >(const Packet4f *vecs)
EIGEN_STRONG_INLINE Packet4f pmax< Packet4f >(const Packet4f &a, const Packet4f &b)
EIGEN_STRONG_INLINE Packet4f pset1< Packet4f >(const float &from)
EIGEN_STRONG_INLINE Packet4i psub< Packet4i >(const Packet4i &a, const Packet4i &b)
EIGEN_STRONG_INLINE Packet4f psub< Packet4f >(const Packet4f &a, const Packet4f &b)
Packet padd(const Packet &a, const Packet &b)
EIGEN_STRONG_INLINE Packet4i pset1< Packet4i >(const int &from)
EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf &a)
EIGEN_STRONG_INLINE float predux_mul< Packet4f >(const Packet4f &a)
static Packet16uc p16uc_DUPLICATE
EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f &a)
EIGEN_STRONG_INLINE Packet4f pand< Packet4f >(const Packet4f &a, const Packet4f &b)