$search
00001 #ifndef CUDA_MATH_H 00002 #define CUDA_MATH_H 00003 00004 #include <stdio.h> 00005 00006 #include "parallel_common.h" 00007 00008 template <typename T> struct vec3 { typedef float Type; typedef float* PtrType; }; // dummy 00009 template <> struct vec3<float> { typedef float3 Type; typedef float3* PtrType; }; 00010 template <> struct vec3<double> { typedef double3 Type; typedef double3* PtrType; }; 00011 00012 template <typename T> struct vec4 { typedef float Type; typedef float* PtrType; }; // dummy 00013 template <> struct vec4<float> { typedef float4 Type; typedef float4* PtrType; }; 00014 template <> struct vec4<double> { typedef double4 Type; typedef double4* PtrType; }; 00015 00016 template <typename T> 00017 inline dxDevice T readAndReplace(T* buffer, const T& element) { 00018 T value = *buffer; 00019 *buffer = element; 00020 return value; 00021 } 00022 00023 inline dxHost dxDevice void add_assign_volatile(volatile float3& a, float3& b, volatile float3& c) { 00024 a.x = b.x = b.x + c.x; 00025 a.y = b.y = b.y + c.y; 00026 a.z = b.z = b.z + c.z; 00027 } 00028 inline dxHost dxDevice void add_assign_volatile(volatile double3& a, double3& b, volatile double3& c) { 00029 a.x = b.x = b.x + c.x; 00030 a.y = b.y = b.y + c.y; 00031 a.z = b.z = b.z + c.z; 00032 } 00033 00034 inline dxHost dxDevice void add_assign_volatile(volatile float4& a, float4& b, volatile float4& c) { 00035 a.x = b.x = b.x + c.x; 00036 a.y = b.y = b.y + c.y; 00037 a.z = b.z = b.z + c.z; 00038 } 00039 inline dxHost dxDevice void add_assign_volatile(volatile double4& a, double4& b, volatile double4& c) { 00040 a.x = b.x = b.x + c.x; 00041 a.y = b.y = b.y + c.y; 00042 a.z = b.z = b.z + c.z; 00043 } 00044 00045 inline dxHost dxDevice void assign_volatile(volatile float3& a, float3& b) { 00046 a.x = b.x; a.y = b.y; a.z = b.z; 00047 } 00048 inline dxHost dxDevice void assign_volatile(volatile double3& a, double3& b) { 00049 a.x = b.x; a.y = b.y; a.z = b.z; 00050 } 00051 00052 inline dxHost dxDevice void make_zero(float3& a) { 00053 a.x = a.y = a.z = 0.0f; 00054 } 00055 inline dxHost dxDevice void make_zero(double3& a) { 00056 a.x = a.y = a.z = 0.0; 00057 } 00058 inline dxHost dxDevice void make_zero(float4& a) { 00059 a.x = a.y = a.z = a.w = 0.0f; 00060 } 00061 inline dxHost dxDevice void make_zero(double4& a) { 00062 a.x = a.y = a.z = a.w = 0.0; 00063 } 00064 00065 #ifndef __CUDACC__ 00066 #include <math.h> 00067 00068 inline float fminf(float a, float b) 00069 { 00070 return a < b ? a : b; 00071 } 00072 00073 inline float fmaxf(float a, float b) 00074 { 00075 return a < b ? a : b; 00076 } 00077 00078 inline int max(int a, int b) 00079 { 00080 return a > b ? a : b; 00081 } 00082 00083 inline int min(int a, int b) 00084 { 00085 return a < b ? a : b; 00086 } 00087 00088 #else 00089 00090 #ifdef CUDA_ATOMICSUPPORT 00091 template <> 00092 dxDevice inline float readAndReplace<float>(float* buffer, const float& element) { 00093 return atomicExch(buffer, element); 00094 } 00095 #endif 00096 00097 #endif 00098 00099 // float functions 00101 00102 // clamp 00103 inline dxDevice dxHost float clamp(float f, float a, float b) 00104 { 00105 return fmaxf(a, fminf(f, b)); 00106 } 00107 00108 // clamp 00109 inline dxDevice dxHost double clamp(double f, double a, double b) 00110 { 00111 return fmax(a, fmin(f, b)); 00112 } 00113 00114 // int2 functions 00116 00117 // negate 00118 inline dxHost dxDevice int2 operator-(int2 &a) 00119 { 00120 return make_int2(-a.x, -a.y); 00121 } 00122 00123 // addition 00124 inline dxHost dxDevice int2 operator+(int2 a, int2 b) 00125 { 00126 return make_int2(a.x + b.x, a.y + b.y); 00127 } 00128 inline dxHost dxDevice void operator+=(int2 &a, int2 b) 00129 { 00130 a.x += b.x; a.y += b.y; 00131 } 00132 00133 // subtract 00134 inline dxHost dxDevice int2 operator-(int2 a, int2 b) 00135 { 00136 return make_int2(a.x - b.x, a.y - b.y); 00137 } 00138 inline dxHost dxDevice void operator-=(int2 &a, int2 b) 00139 { 00140 a.x -= b.x; a.y -= b.y; 00141 } 00142 00143 // multiply 00144 inline dxHost dxDevice int2 operator*(int2 a, int2 b) 00145 { 00146 return make_int2(a.x * b.x, a.y * b.y); 00147 } 00148 inline dxHost dxDevice int2 operator*(int2 a, int s) 00149 { 00150 return make_int2(a.x * s, a.y * s); 00151 } 00152 inline dxHost dxDevice int2 operator*(int s, int2 a) 00153 { 00154 return make_int2(a.x * s, a.y * s); 00155 } 00156 inline dxHost dxDevice void operator*=(int2 &a, int s) 00157 { 00158 a.x *= s; a.y *= s; 00159 } 00160 00161 // float3 functions 00163 00164 // additional constructors 00165 inline dxHost dxDevice float3 make_float3(float s) 00166 { 00167 return make_float3(s, s, s); 00168 } 00169 inline dxHost dxDevice float3 make_float3(float4 a) 00170 { 00171 return make_float3(a.x, a.y, a.z); // discards w 00172 } 00173 inline dxHost dxDevice float3 make_float3(int3 a) 00174 { 00175 return make_float3(float(a.x), float(a.y), float(a.z)); 00176 } 00177 00178 inline dxHost dxDevice double3 make_double3(double s) 00179 { 00180 return make_double3(s, s, s); 00181 } 00182 00183 inline dxHost dxDevice double3 make_double3(double4 a) 00184 { 00185 return make_double3(a.x, a.y, a.z); // discards w 00186 } 00187 inline dxHost dxDevice double3 make_double3(int3 a) 00188 { 00189 return make_double3(double(a.x), double(a.y), double(a.z)); 00190 } 00191 00192 // negate 00193 inline dxHost dxDevice float3 operator-(float3 &a) 00194 { 00195 return make_float3(-a.x, -a.y, -a.z); 00196 } 00197 00198 // min 00199 static __inline__ dxHost dxDevice float3 fminf(float3 a, float3 b) 00200 { 00201 return make_float3(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z)); 00202 } 00203 00204 // max 00205 static __inline__ dxHost dxDevice float3 fmaxf(float3 a, float3 b) 00206 { 00207 return make_float3(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z)); 00208 } 00209 00210 // addition 00211 inline dxHost dxDevice float3 operator+(float3 a, float3 b) 00212 { 00213 return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); 00214 } 00215 inline dxHost dxDevice double3 operator+(double3 a, double3 b) 00216 { 00217 return make_double3(a.x + b.x, a.y + b.y, a.z + b.z); 00218 } 00219 inline dxHost dxDevice float3 operator+(float3 a, float b) 00220 { 00221 return make_float3(a.x + b, a.y + b, a.z + b); 00222 } 00223 inline dxHost dxDevice double3 operator+(double3 a, double b) 00224 { 00225 return make_double3(a.x + b, a.y + b, a.z + b); 00226 } 00227 inline dxHost dxDevice void operator+=(float3 &a, float3 b) 00228 { 00229 a.x += b.x; a.y += b.y; a.z += b.z; 00230 } 00231 inline dxHost dxDevice void operator+=(double3 &a, double3 b) 00232 { 00233 a.x += b.x; a.y += b.y; a.z += b.z; 00234 } 00235 00236 // subtract 00237 inline dxHost dxDevice float3 operator-(float3 a, float3 b) 00238 { 00239 return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); 00240 } 00241 inline dxHost dxDevice float3 operator-(float3 a, float b) 00242 { 00243 return make_float3(a.x - b, a.y - b, a.z - b); 00244 } 00245 inline dxHost dxDevice void operator-=(float3 &a, float3 b) 00246 { 00247 a.x -= b.x; a.y -= b.y; a.z -= b.z; 00248 } 00249 00250 // multiply 00251 inline dxHost dxDevice float3 operator*(float3 a, float3 b) 00252 { 00253 return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); 00254 } 00255 inline dxHost dxDevice float3 operator*(float3 a, float s) 00256 { 00257 return make_float3(a.x * s, a.y * s, a.z * s); 00258 } 00259 inline dxHost dxDevice float3 operator*(float s, float3 a) 00260 { 00261 return make_float3(a.x * s, a.y * s, a.z * s); 00262 } 00263 inline dxHost dxDevice void operator*=(float3 &a, float s) 00264 { 00265 a.x *= s; a.y *= s; a.z *= s; 00266 } 00267 inline dxHost dxDevice void operator*=(double3 &a, double s) 00268 { 00269 a.x *= s; a.y *= s; a.z *= s; 00270 } 00271 00272 // divide 00273 inline dxHost dxDevice float3 operator/(float3 a, float3 b) 00274 { 00275 return make_float3(a.x / b.x, a.y / b.y, a.z / b.z); 00276 } 00277 inline dxHost dxDevice float3 operator/(float3 a, float s) 00278 { 00279 float inv = 1.0f / s; 00280 return a * inv; 00281 } 00282 inline dxHost dxDevice float3 operator/(float s, float3 a) 00283 { 00284 float inv = 1.0f / s; 00285 return a * inv; 00286 } 00287 inline dxHost dxDevice void operator/=(float3 &a, float s) 00288 { 00289 float inv = 1.0f / s; 00290 a *= inv; 00291 } 00292 00293 // clamp 00294 inline dxDevice dxHost float3 clamp(float3 v, float a, float b) 00295 { 00296 return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); 00297 } 00298 00299 inline dxDevice dxHost float3 clamp(float3 v, float3 a, float3 b) 00300 { 00301 return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z)); 00302 } 00303 00304 // dot product 00305 inline dxHost dxDevice float dot(const float3& a, const float3& b) 00306 { 00307 return a.x * b.x + a.y * b.y + a.z * b.z; 00308 } 00309 00310 inline dxHost dxDevice double dot(const double3& a, const double3& b) 00311 { 00312 return a.x * b.x + a.y * b.y + a.z * b.z; 00313 } 00314 // dot product 00315 inline dxHost dxDevice float dot(const float3& a, const float4& b) 00316 { 00317 return a.x * b.x + a.y * b.y + a.z * b.z; 00318 } 00319 00320 inline dxHost dxDevice double dot(const double3& a, const double4& b) 00321 { 00322 return a.x * b.x + a.y * b.y + a.z * b.z; 00323 } 00324 // dot product 00325 inline dxHost dxDevice float dot(const float4& a, const float4& b) 00326 { 00327 return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; 00328 } 00329 00330 inline dxHost dxDevice double dot(const double4& a, const double4& b) 00331 { 00332 return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; 00333 } 00334 00335 // cross product 00336 inline dxHost dxDevice float3 cross(float3 a, float3 b) 00337 { 00338 return make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x); 00339 } 00340 00341 // length 00342 inline dxHost dxDevice float length(float3 v) 00343 { 00344 return sqrtf(dot(v, v)); 00345 } 00346 00347 // normalize 00348 inline dxHost dxDevice float3 normalize(float3 v) 00349 { 00350 float invLen = 1.0f / sqrtf(dot(v, v)); 00351 return v * invLen; 00352 } 00353 00354 // floor 00355 inline dxHost dxDevice float3 floor(const float3 v) 00356 { 00357 return make_float3(floor(v.x), floor(v.y), floor(v.z)); 00358 } 00359 00360 // float4 functions 00362 00363 // additional constructors 00364 inline dxHost dxDevice float4 make_float4(float s) 00365 { 00366 return make_float4(s, s, s, s); 00367 } 00368 inline dxHost dxDevice float4 make_float4(float3 a) 00369 { 00370 return make_float4(a.x, a.y, a.z, 0.0f); 00371 } 00372 inline dxHost dxDevice float4 make_float4(float3 a, float w) 00373 { 00374 return make_float4(a.x, a.y, a.z, w); 00375 } 00376 inline dxHost dxDevice float4 make_float4(const float& a, const float& b, const float& c) 00377 { 00378 return make_float4((float)a, (float)b, (float)c); 00379 } 00380 inline dxHost dxDevice float4 make_float4(int4 a) 00381 { 00382 return make_float4(float(a.x), float(a.y), float(a.z), float(a.w)); 00383 } 00384 00385 inline dxHost dxDevice double4 make_double4(double s) 00386 { 00387 return make_double4(s, s, s, s); 00388 } 00389 inline dxHost dxDevice double4 make_double4(double3 a) 00390 { 00391 return make_double4(a.x, a.y, a.z, 0.0f); 00392 } 00393 inline dxHost dxDevice double4 make_double4(double3 a, double w) 00394 { 00395 return make_double4(a.x, a.y, a.z, w); 00396 } 00397 inline dxHost dxDevice double4 make_double4(const double& a, const double& b, const double& c) 00398 { 00399 return make_double4((double)a, (double)b, (double)c); 00400 } 00401 inline dxHost dxDevice double4 make_double4(int4 a) 00402 { 00403 return make_double4(double(a.x), double(a.y), double(a.z), double(a.w)); 00404 } 00405 inline dxHost dxDevice double4 make_fdouble4(double s) 00406 { 00407 double4 d; 00408 d.x = s; 00409 d.y = s; 00410 d.z = s; 00411 d.w = s; 00412 float* f; 00413 //f = reinterpret_cast<float4*>(&d); 00414 f = (float*)(&(d.x)); *f = (float)s; 00415 f = (float*)(&(d.y)); *f = (float)s; 00416 f = (float*)(&(d.z)); *f = (float)s; 00417 f = (float*)(&(d.w)); *f = (float)s; 00418 return d; 00419 } 00420 00421 00422 // negate 00423 inline dxHost dxDevice float4 operator-(float4 &a) 00424 { 00425 return make_float4(-a.x, -a.y, -a.z, -a.w); 00426 } 00427 00428 // min 00429 static __inline__ dxHost dxDevice float4 fminf(float4 a, float4 b) 00430 { 00431 return make_float4(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z), fminf(a.w,b.w)); 00432 } 00433 00434 // max 00435 static __inline__ dxHost dxDevice float4 fmaxf(float4 a, float4 b) 00436 { 00437 return make_float4(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z), fmaxf(a.w,b.w)); 00438 } 00439 00440 // addition 00441 inline dxHost dxDevice float4 operator+(float4 a, float4 b) 00442 { 00443 return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); 00444 } 00445 inline dxHost dxDevice double4 operator+(double4 a, double4 b) 00446 { 00447 return make_double4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); 00448 } 00449 inline dxHost dxDevice void operator+=(float4 &a, float4 b) 00450 { 00451 a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w; 00452 } 00453 inline dxHost dxDevice void operator+=(double4 &a, double4 b) 00454 { 00455 a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w; 00456 } 00457 00458 // subtract 00459 inline dxHost dxDevice float4 operator-(float4 a, float4 b) 00460 { 00461 return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); 00462 } 00463 inline dxHost dxDevice void operator-=(float4 &a, float4 b) 00464 { 00465 a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w; 00466 } 00467 00468 // multiply 00469 template <typename T> inline dxHost dxDevice typename vec4<T>::Type operator*(typename vec4<T>::Type a, T s) 00470 { 00471 return make_vec4(a.x * s, a.y * s, a.z * s, a.w * s); 00472 } 00473 inline dxHost dxDevice float4 operator*(float s, float4 a) 00474 { 00475 return make_float4(a.x * s, a.y * s, a.z * s, a.w * s); 00476 } 00477 inline dxHost dxDevice void operator*=(float4 &a, float s) 00478 { 00479 a.x *= s; a.y *= s; a.z *= s; a.w *= s; 00480 } 00481 inline dxHost dxDevice void operator*=(double4 &a, double s) 00482 { 00483 a.x *= s; a.y *= s; a.z *= s; a.w *= s; 00484 } 00485 00486 // divide 00487 inline dxHost dxDevice float4 operator/(float4 a, float4 b) 00488 { 00489 return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); 00490 } 00491 inline dxHost dxDevice float4 operator/(float4 a, float s) 00492 { 00493 float inv = 1.0f / s; 00494 return a * inv; 00495 } 00496 inline dxHost dxDevice float4 operator/(float s, float4 a) 00497 { 00498 float inv = 1.0f / s; 00499 return a * inv; 00500 } 00501 inline dxHost dxDevice void operator/=(float4 &a, float s) 00502 { 00503 float inv = 1.0f / s; 00504 a *= inv; 00505 } 00506 00507 // clamp 00508 inline dxDevice dxHost float4 clamp(float4 v, float a, float b) 00509 { 00510 return make_float4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b)); 00511 } 00512 00513 inline dxDevice dxHost float4 clamp(float4 v, float4 a, float4 b) 00514 { 00515 return make_float4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w)); 00516 } 00517 00518 // dot product 00519 template <typename T> inline dxHost dxDevice T dot(typename vec4<T>::Type a, typename vec4<T>::Type b) 00520 { 00521 return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; 00522 } 00523 00524 // length 00525 inline dxHost dxDevice float length(float4 r) 00526 { 00527 return sqrtf(dot<float>(r, r)); 00528 } 00529 00530 // normalize 00531 inline dxHost dxDevice float4 normalize(float4 v) 00532 { 00533 float invLen = 1.0f / sqrtf(dot<float>(v, v)); 00534 return v * invLen; 00535 } 00536 00537 // floor 00538 inline dxHost dxDevice float4 floor(const float4 v) 00539 { 00540 return make_float4(floor(v.x), floor(v.y), floor(v.z), floor(v.w)); 00541 } 00542 00543 inline dxHost dxDevice vec3<float>::Type make_vec3(float a, float b, float c) { 00544 return make_float3(a,b,c); 00545 } 00546 00547 inline dxHost dxDevice vec4<float>::Type make_vec4(const float& a, const float& b, const float& c) { 00548 return make_float4(a,b,c,(float)0.0); 00549 } 00550 00551 inline dxHost dxDevice vec4<double>::Type make_vec4(const double& a, const double& b, const double& c) { 00552 return make_double4(a,b,c,(double)0.0); 00553 } 00554 00555 inline dxHost dxDevice vec4<float>::Type make_vec4(float a, float b, float c, float d) { 00556 return make_float4(a,b,c,d); 00557 } 00558 00559 inline dxHost dxDevice vec4<double>::Type make_vec4(double a, double b, double c, double d) { 00560 return make_double4(a,b,c,d); 00561 } 00562 inline dxHost dxDevice vec3<double>::Type make_vec3(double a, double b, double c) { 00563 return make_double3(a,b,c); 00564 } 00565 00566 inline dxHost dxDevice vec4<float>::Type make_vec4( float3 a ) { return make_float4(a); } 00567 inline dxHost dxDevice vec4<double>::Type make_vec4( double3 a ) { return make_double4(a); } 00568 00569 inline dxHost dxDevice vec4<float>::Type make_vec4( float a ) { return make_float4(a); } 00570 inline dxHost dxDevice vec4<double>::Type make_vec4( double a ) { return make_double4(a); } 00571 00572 inline dxHost dxDevice vec3<float>::Type make_vec3( float4 a ) { return make_float3(a); } 00573 inline dxHost dxDevice vec3<double>::Type make_vec3( double4 a ) { return make_double3(a); } 00574 00575 inline dxHost dxDevice vec3<float>::Type make_vec3( float a ) { return make_float3(a); } 00576 inline dxHost dxDevice vec3<double>::Type make_vec3( double a ) { return make_double3(a); } 00577 00578 00579 #endif