10 #ifndef EIGEN_PACKET_MATH_ALTIVEC_H    11 #define EIGEN_PACKET_MATH_ALTIVEC_H    17 #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD    18 #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4    21 #ifndef EIGEN_HAS_FUSE_CJMADD    22 #define EIGEN_HAS_FUSE_CJMADD 1    26 #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS    27 #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16    40 #define _EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \    41   Packet4f p4f_##NAME = (Packet4f) vec_splat_s32(X)    43 #define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \    44   Packet4i p4i_##NAME = vec_splat_s32(X)    46 #define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \    47   Packet4f p4f_##NAME = pset1<Packet4f>(X)    49 #define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \    50   Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1<int>(X))    52 #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \    53   Packet4i p4i_##NAME = pset1<Packet4i>(X)    56 #define DST_CTRL(size, count, stride) (((size) << 24) | ((count) << 16) | (stride))    61 static Packet16uc 
p16uc_REVERSE = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
    63 static Packet16uc 
p16uc_DUPLICATE = {0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7};
    70 static Packet4f 
p4f_ONE = vec_ctf(p4i_ONE, 0);
   151   Packet4f vc = vec_ld(0, af);
   152   vc = vec_splat(vc, 0);
   159   Packet4i vc = vec_ld(0, ai);
   160   vc = vec_splat(vc, 0);
   218   Packet4f t, y_0, y_1, res;
   224   t   = vec_nmsub(y_0, b, p4f_ONE);
   225   y_1 = vec_madd(y_0, t, y_0);
   227   res = vec_madd(a, y_1, p4f_ZERO);
   232 { 
eigen_assert(
false && 
"packet integer division are not supported by AltiVec");
   237 template<> 
EIGEN_STRONG_INLINE Packet4f 
pmadd(
const Packet4f& a, 
const Packet4f& b, 
const Packet4f& c) { 
return vec_madd(a, b, c); }
   268   MSQ = vec_ld(0, (
unsigned char *)from);          
   269   LSQ = vec_ld(15, (
unsigned char *)from);         
   270   mask = vec_lvsl(0, from);                        
   271   return (Packet4f) vec_perm(MSQ, LSQ, mask);           
   280   MSQ = vec_ld(0, (
unsigned char *)from);          
   281   LSQ = vec_ld(15, (
unsigned char *)from);         
   282   mask = vec_lvsl(0, from);                        
   283   return (Packet4i) vec_perm(MSQ, LSQ, mask);    
   291   return vec_perm(p, p, p16uc_DUPLICATE);
   298   return vec_perm(p, p, p16uc_DUPLICATE);
   309   Packet16uc MSQ, LSQ, edges;
   310   Packet16uc edgeAlign, align;
   312   MSQ = vec_ld(0, (
unsigned char *)to);                     
   313   LSQ = vec_ld(15, (
unsigned char *)to);                    
   314   edgeAlign = vec_lvsl(0, to);                              
   315   edges=vec_perm(LSQ,MSQ,edgeAlign);                        
   316   align = vec_lvsr( 0, to );                                
   317   MSQ = vec_perm(edges,(Packet16uc)from,align);             
   318   LSQ = vec_perm((Packet16uc)from,edges,align);             
   319   vec_st( LSQ, 15, (
unsigned char *)to );                   
   320   vec_st( MSQ, 0, (
unsigned char *)to );                    
   327   Packet16uc MSQ, LSQ, edges;
   328   Packet16uc edgeAlign, align;
   330   MSQ = vec_ld(0, (
unsigned char *)to);                     
   331   LSQ = vec_ld(15, (
unsigned char *)to);                    
   332   edgeAlign = vec_lvsl(0, to);                              
   333   edges=vec_perm(LSQ, MSQ, edgeAlign);                      
   334   align = vec_lvsr( 0, to );                                
   335   MSQ = vec_perm(edges, (Packet16uc) from, align);          
   336   LSQ = vec_perm((Packet16uc) from, edges, align);          
   337   vec_st( LSQ, 15, (
unsigned char *)to );                   
   338   vec_st( MSQ, 0, (
unsigned char *)to );                    
   358   b   = (
Packet4f) vec_sld(sum, sum, 4);
   359   sum = vec_add(sum, b);
   365   Packet4f v[4], sum[4];
   370   v[0] = vec_mergeh(vecs[0], vecs[2]);
   371   v[1] = vec_mergel(vecs[0], vecs[2]);
   372   v[2] = vec_mergeh(vecs[1], vecs[3]);
   373   v[3] = vec_mergel(vecs[1], vecs[3]);
   375   sum[0] = vec_mergeh(v[0], v[2]);
   376   sum[1] = vec_mergel(v[0], v[2]);
   377   sum[2] = vec_mergeh(v[1], v[3]);
   378   sum[3] = vec_mergel(v[1], v[3]);
   382   sum[0] = vec_add(sum[0], sum[1]);
   384   sum[1] = vec_add(sum[2], sum[3]);
   386   sum[0] = vec_add(sum[0], sum[1]);
   394   sum = vec_sums(a, p4i_ZERO);
   395   sum = vec_sld(sum, p4i_ZERO, 12);
   401   Packet4i v[4], sum[4];
   406   v[0] = vec_mergeh(vecs[0], vecs[2]);
   407   v[1] = vec_mergel(vecs[0], vecs[2]);
   408   v[2] = vec_mergeh(vecs[1], vecs[3]);
   409   v[3] = vec_mergel(vecs[1], vecs[3]);
   411   sum[0] = vec_mergeh(v[0], v[2]);
   412   sum[1] = vec_mergel(v[0], v[2]);
   413   sum[2] = vec_mergeh(v[1], v[3]);
   414   sum[3] = vec_mergel(v[1], v[3]);
   418   sum[0] = vec_add(sum[0], sum[1]);
   420   sum[1] = vec_add(sum[2], sum[3]);
   422   sum[0] = vec_add(sum[0], sum[1]);
   432   prod = 
pmul(a, (Packet4f)vec_sld(a, a, 8));
   433   return pfirst(
pmul(prod, (Packet4f)vec_sld(prod, prod, 4)));
   440   return aux[0] * aux[1] * aux[2] * aux[3];
   447   b = vec_min(a, vec_sld(a, a, 8));
   448   res = vec_min(b, vec_sld(b, b, 4));
   455   b = vec_min(a, vec_sld(a, a, 8));
   456   res = vec_min(b, vec_sld(b, b, 4));
   464   b = vec_max(a, vec_sld(a, a, 8));
   465   res = vec_max(b, vec_sld(b, b, 4));
   472   b = vec_max(a, vec_sld(a, a, 8));
   473   res = vec_max(b, vec_sld(b, b, 4));
   483       first = vec_sld(first, second, Offset*4);
   493       first = vec_sld(first, second, Offset*4);
   501 #endif // EIGEN_PACKET_MATH_ALTIVEC_H 
EIGEN_STRONG_INLINE Packet4i ploaddup< Packet4i >(const int *from)
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0)
EIGEN_STRONG_INLINE Packet4f pxor< Packet4f >(const Packet4f &a, const Packet4f &b)
__vector unsigned char Packet16uc
EIGEN_STRONG_INLINE int pfirst< Packet4i >(const Packet4i &a)
#define EIGEN_STRONG_INLINE
EIGEN_STRONG_INLINE Packet4i pload< Packet4i >(const int *from)
EIGEN_STRONG_INLINE float pfirst< Packet4f >(const Packet4f &a)
EIGEN_STRONG_INLINE Packet4f ploaddup< Packet4f >(const float *from)
EIGEN_STRONG_INLINE int predux< Packet4i >(const Packet4i &a)
#define EIGEN_DEBUG_ALIGNED_STORE
static EIGEN_STRONG_INLINE void run(Packet4i &first, const Packet4i &second)
EIGEN_STRONG_INLINE Packet4i ploadu< Packet4i >(const int *from)
EIGEN_STRONG_INLINE float predux_max< Packet4f >(const Packet4f &a)
static Packet16uc p16uc_FORWARD
static EIGEN_STRONG_INLINE void run(Packet4f &first, const Packet4f &second)
#define EIGEN_DEBUG_UNALIGNED_STORE
EIGEN_STRONG_INLINE void prefetch< float >(const float *addr)
static Packet16uc p16uc_REVERSE
EIGEN_STRONG_INLINE Packet4i pdiv< Packet4i >(const Packet4i &, const Packet4i &)
EIGEN_STRONG_INLINE int predux_min< Packet4i >(const Packet4i &a)
__vector __bool int Packet4bi
#define EIGEN_DEBUG_ALIGNED_LOAD
void pstore(Scalar *to, const Packet &from)
EIGEN_STRONG_INLINE Packet4i plset< int >(const int &a)
EIGEN_STRONG_INLINE Packet4f plset< float >(const float &a)
static Packet4i p4i_COUNTDOWN
EIGEN_STRONG_INLINE Packet4f pdiv< Packet4f >(const Packet4f &a, const Packet4f &b)
static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0)
EIGEN_STRONG_INLINE Packet4f pmin< Packet4f >(const Packet4f &a, const Packet4f &b)
EIGEN_STRONG_INLINE void prefetch< int >(const int *addr)
EIGEN_STRONG_INLINE Packet4i pand< Packet4i >(const Packet4i &a, const Packet4i &b)
EIGEN_STRONG_INLINE int predux_max< Packet4i >(const Packet4i &a)
EIGEN_STRONG_INLINE Packet4i pmin< Packet4i >(const Packet4i &a, const Packet4i &b)
EIGEN_STRONG_INLINE Packet4f ploadu< Packet4f >(const float *from)
EIGEN_STRONG_INLINE Packet4i pxor< Packet4i >(const Packet4i &a, const Packet4i &b)
EIGEN_STRONG_INLINE void pstoreu< int >(int *to, const Packet4i &from)
EIGEN_STRONG_INLINE void pstore< float >(float *to, const Packet4f &from)
EIGEN_STRONG_INLINE Packet4f por< Packet4f >(const Packet4f &a, const Packet4f &b)
unpacket_traits< Packet >::type pfirst(const Packet &a)
EIGEN_STRONG_INLINE Packet4i preduxp< Packet4i >(const Packet4i *vecs)
__vector short int Packet8i
EIGEN_STRONG_INLINE Packet4f pandnot< Packet4f >(const Packet4f &a, const Packet4f &b)
EIGEN_STRONG_INLINE float predux< Packet4f >(const Packet4f &a)
TFSIMD_FORCE_INLINE const tfScalar & x() const 
EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf &a)
EIGEN_STRONG_INLINE float predux_min< Packet4f >(const Packet4f &a)
EIGEN_STRONG_INLINE void pstoreu< float >(float *to, const Packet4f &from)
static Packet4f p4f_ZERO_
#define DST_CTRL(size, count, stride)
EIGEN_STRONG_INLINE int predux_mul< Packet4i >(const Packet4i &a)
EIGEN_STRONG_INLINE Packet4f pmul< Packet4f >(const Packet4f &a, const Packet4f &b)
EIGEN_STRONG_INLINE Packet4f pload< Packet4f >(const float *from)
EIGEN_STRONG_INLINE Packet4i pmax< Packet4i >(const Packet4i &a, const Packet4i &b)
EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf &a)
EIGEN_STRONG_INLINE Packet4i por< Packet4i >(const Packet4i &a, const Packet4i &b)
static Packet4f p4f_COUNTDOWN
EIGEN_STRONG_INLINE Packet4i padd< Packet4i >(const Packet4i &a, const Packet4i &b)
EIGEN_STRONG_INLINE Packet4f padd< Packet4f >(const Packet4f &a, const Packet4f &b)
EIGEN_STRONG_INLINE void pstore< int >(int *to, const Packet4i &from)
EIGEN_STRONG_INLINE Packet4i pandnot< Packet4i >(const Packet4i &a, const Packet4i &b)
__vector unsigned int Packet4ui
EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Packet pmul(const Packet &a, const Packet &b)
EIGEN_STRONG_INLINE Packet4f preduxp< Packet4f >(const Packet4f *vecs)
EIGEN_STRONG_INLINE Packet4f pmax< Packet4f >(const Packet4f &a, const Packet4f &b)
EIGEN_STRONG_INLINE Packet4f pset1< Packet4f >(const float &from)
EIGEN_STRONG_INLINE Packet4i psub< Packet4i >(const Packet4i &a, const Packet4i &b)
EIGEN_STRONG_INLINE Packet4f psub< Packet4f >(const Packet4f &a, const Packet4f &b)
Packet padd(const Packet &a, const Packet &b)
EIGEN_STRONG_INLINE Packet4i pset1< Packet4i >(const int &from)
EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf &a)
EIGEN_STRONG_INLINE float predux_mul< Packet4f >(const Packet4f &a)
static Packet16uc p16uc_DUPLICATE
EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f &a)
EIGEN_STRONG_INLINE Packet4f pand< Packet4f >(const Packet4f &a, const Packet4f &b)