00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039 #ifndef PCL_SSE_H_
00040 #define PCL_SSE_H_
00041 #if defined(__SSE2__)
00042 #include <emmintrin.h>
00043
00044 #define RETf inline __m128
00045 #define RETi inline __m128i
00046
00047 namespace pcl {
00048
00049
00050 RETf sse_set( const float &x ) { return _mm_set1_ps(x); }
00051 RETf sse_set( float x, float y, float z, float w ) { return _mm_set_ps(x,y,z,w); }
00052 RETi sse_set( const int &x ) { return _mm_set1_epi32(x); }
00053 RETf sse_ld( const float &x ) { return _mm_load_ps(&x); }
00054 RETf sse_ldu( const float &x ) { return _mm_loadu_ps(&x); }
00055 RETf sse_str( float &x, const __m128 y ) { _mm_store_ps(&x,y); return y; }
00056 RETf sse_str1( float &x, const __m128 y ) { _mm_store_ss(&x,y); return y; }
00057 RETf sse_stru( float &x, const __m128 y ) { _mm_storeu_ps(&x,y); return y; }
00058 RETf sse_str( float &x, const float y ) { return sse_str(x,sse_set(y)); }
00059
00060
00061 RETi sse_add( const __m128i x, const __m128i y ) { return _mm_add_epi32(x,y); }
00062 RETf sse_add( const __m128 x, const __m128 y ) { return _mm_add_ps(x,y); }
00063 RETf sse_add( const __m128 x, const __m128 y, const __m128 z ) {
00064 return sse_add(sse_add(x,y),z); }
00065 RETf sse_add( const __m128 a, const __m128 b, const __m128 c, const __m128 &d ) {
00066 return sse_add(sse_add(sse_add(a,b),c),d); }
00067 RETf sse_sub( const __m128 x, const __m128 y ) { return _mm_sub_ps(x,y); }
00068 RETf sse_mul( const __m128 x, const __m128 y ) { return _mm_mul_ps(x,y); }
00069 RETf sse_mul( const __m128 x, const float y ) { return sse_mul(x,sse_set(y)); }
00070 RETf sse_mul( const float x, const __m128 y ) { return sse_mul(sse_set(x),y); }
00071 RETf sse_inc( __m128 &x, const __m128 y ) { return x = sse_add(x,y); }
00072 RETf sse_inc( float &x, const __m128 y ) { __m128 t=sse_add(sse_ld(x),y); return sse_str(x,t); }
00073 RETf sse_dec( __m128 &x, const __m128 y ) { return x = sse_sub(x,y); }
00074 RETf sse_dec( float &x, const __m128 y ) { __m128 t=sse_sub(sse_ld(x),y); return sse_str(x,t); }
00075 RETf sse_min( const __m128 x, const __m128 y ) { return _mm_min_ps(x,y); }
00076 RETf sse_rcp( const __m128 x ) { return _mm_rcp_ps(x); }
00077 RETf sse_rcpsqrt( const __m128 x ) { return _mm_rsqrt_ps(x); }
00078
00079
00080 RETf sse_and( const __m128 x, const __m128 y ) { return _mm_and_ps(x,y); }
00081 RETi sse_and( const __m128i x, const __m128i y ) { return _mm_and_si128(x,y); }
00082 RETf sse_andnot( const __m128 x, const __m128 y ) { return _mm_andnot_ps(x,y); }
00083 RETf sse_or( const __m128 x, const __m128 y ) { return _mm_or_ps(x,y); }
00084 RETf sse_xor( const __m128 x, const __m128 y ) { return _mm_xor_ps(x,y); }
00085
00086
00087 RETf sse_cmpgt( const __m128 x, const __m128 y ) { return _mm_cmpgt_ps(x,y); }
00088 RETi sse_cmpgt( const __m128i x, const __m128i y ) { return _mm_cmpgt_epi32(x,y); }
00089
00090
00091 RETf sse_cvt( const __m128i x ) { return _mm_cvtepi32_ps(x); }
00092 RETi sse_cvt( const __m128 x ) { return _mm_cvttps_epi32(x); }
00093
00094 }
00095
00096 #undef RETf
00097 #undef RETi
00098 #endif
00099 #endif