00001 00002 00003 00004 00005 00006 00007 00008 00009 00010 00011 00012 00013 00014 00015 00016 00017 00018 00019 00020 00021 00022 00023 00024 00025 00026 00027 00028 00029 #pragma once 00030 00031 #include "setup.hpp" 00032 00033 #if(!(GLM_ARCH & GLM_ARCH_SSE2)) 00034 # error "SSE2 instructions not supported or enabled" 00035 #else 00036 00037 namespace glm{ 00038 namespace detail 00039 { 00040 /* 00041 GLM_FUNC_QUALIFIER __m128 sse_rsqrt_nr_ss(__m128 const x) 00042 { 00043 __m128 recip = _mm_rsqrt_ss( x ); // "estimate" opcode 00044 const static __m128 three = { 3, 3, 3, 3 }; // aligned consts for fast load 00045 const static __m128 half = { 0.5,0.5,0.5,0.5 }; 00046 __m128 halfrecip = _mm_mul_ss( half, recip ); 00047 __m128 threeminus_xrr = _mm_sub_ss( three, _mm_mul_ss( x, _mm_mul_ss ( recip, recip ) ) ); 00048 return _mm_mul_ss( halfrecip, threeminus_xrr ); 00049 } 00050 00051 GLM_FUNC_QUALIFIER __m128 sse_normalize_fast_ps( float * RESTRICT vOut, float * RESTRICT vIn ) 00052 { 00053 __m128 x = _mm_load_ss(&vIn[0]); 00054 __m128 y = _mm_load_ss(&vIn[1]); 00055 __m128 z = _mm_load_ss(&vIn[2]); 00056 00057 const __m128 l = // compute x*x + y*y + z*z 00058 _mm_add_ss( 00059 _mm_add_ss( _mm_mul_ss(x,x), 00060 _mm_mul_ss(y,y) 00061 ), 00062 _mm_mul_ss( z, z ) 00063 ); 00064 00065 00066 const __m128 rsqt = _mm_rsqrt_nr_ss( l ); 00067 _mm_store_ss( &vOut[0] , _mm_mul_ss( rsqt, x ) ); 00068 _mm_store_ss( &vOut[1] , _mm_mul_ss( rsqt, y ) ); 00069 _mm_store_ss( &vOut[2] , _mm_mul_ss( rsqt, z ) ); 00070 00071 return _mm_mul_ss( l , rsqt ); 00072 } 00073 */ 00074 }//namespace detail 00075 }//namespace glm 00076 00077 #endif//GLM_ARCH