parallel_quickstep: parallel_quickstep: parallel

00001 #ifndef CUDA_MATH_H
00002 #define CUDA_MATH_H
00003 
00004 #include <stdio.h>
00005 
00006 #include "parallel_common.h"
00007 
00008 template <typename T> struct vec3         { typedef float   Type; typedef float* PtrType; }; // dummy
00009 template <>           struct vec3<float>  { typedef float3  Type; typedef float3* PtrType; };
00010 template <>           struct vec3<double> { typedef double3 Type; typedef double3* PtrType; };
00011 
00012 template <typename T> struct vec4         { typedef float   Type; typedef float* PtrType; }; // dummy
00013 template <>           struct vec4<float>  { typedef float4  Type; typedef float4* PtrType; };
00014 template <>           struct vec4<double> { typedef double4 Type; typedef double4* PtrType; };
00015 
00016 template <typename T>
00017 inline dxDevice T readAndReplace(T* buffer, const T& element) {
00018   T value = *buffer;
00019   *buffer = element;
00020   return value;
00021 }
00022 
00023 inline dxHost dxDevice void add_assign_volatile(volatile float3& a, float3& b, volatile float3& c) {
00024   a.x = b.x = b.x + c.x;
00025   a.y = b.y = b.y + c.y;
00026   a.z = b.z = b.z + c.z;
00027 }
00028 inline dxHost dxDevice void add_assign_volatile(volatile double3& a, double3& b, volatile double3& c) {
00029   a.x = b.x = b.x + c.x;
00030   a.y = b.y = b.y + c.y;
00031   a.z = b.z = b.z + c.z;
00032 }
00033 
00034 inline dxHost dxDevice void add_assign_volatile(volatile float4& a, float4& b, volatile float4& c) {
00035   a.x = b.x = b.x + c.x;
00036   a.y = b.y = b.y + c.y;
00037   a.z = b.z = b.z + c.z;
00038 }
00039 inline dxHost dxDevice void add_assign_volatile(volatile double4& a, double4& b, volatile double4& c) {
00040   a.x = b.x = b.x + c.x;
00041   a.y = b.y = b.y + c.y;
00042   a.z = b.z = b.z + c.z;
00043 }
00044 
00045 inline dxHost dxDevice void assign_volatile(volatile float3& a, float3& b) {
00046   a.x = b.x; a.y = b.y; a.z = b.z;
00047 }
00048 inline dxHost dxDevice void assign_volatile(volatile double3& a, double3& b) {
00049   a.x = b.x; a.y = b.y; a.z = b.z;
00050 }
00051 
00052 inline dxHost dxDevice void make_zero(float3& a) {
00053   a.x = a.y = a.z = 0.0f;
00054 }
00055 inline dxHost dxDevice void make_zero(double3& a) {
00056   a.x = a.y = a.z = 0.0;
00057 }
00058 inline dxHost dxDevice void make_zero(float4& a) {
00059   a.x = a.y = a.z = a.w = 0.0f;
00060 }
00061 inline dxHost dxDevice void make_zero(double4& a) {
00062   a.x = a.y = a.z = a.w = 0.0;
00063 }
00064 
00065 #ifndef __CUDACC__
00066 #include <math.h>
00067 
00068 inline float fminf(float a, float b)
00069 {
00070   return a < b ? a : b;
00071 }
00072 
00073 inline float fmaxf(float a, float b)
00074 {
00075   return a < b ? a : b;
00076 }
00077 
00078 inline int max(int a, int b)
00079 {
00080   return a > b ? a : b;
00081 }
00082 
00083 inline int min(int a, int b)
00084 {
00085   return a < b ? a : b;
00086 }
00087 
00088 #else
00089 
00090 #ifdef CUDA_ATOMICSUPPORT
00091 template <>
00092 dxDevice inline float readAndReplace<float>(float* buffer, const float& element) {
00093   return atomicExch(buffer, element);
00094 }
00095 #endif
00096 
00097 #endif
00098 
00099 // float functions
00101 
00102 // clamp
00103 inline dxDevice dxHost float clamp(float f, float a, float b)
00104 {
00105   return fmaxf(a, fminf(f, b));
00106 }
00107 
00108 // clamp
00109 inline dxDevice dxHost double clamp(double f, double a, double b)
00110 {
00111   return fmax(a, fmin(f, b));
00112 }
00113 
00114 // int2 functions
00116 
00117 // negate
00118 inline dxHost dxDevice int2 operator-(int2 &a)
00119 {
00120   return make_int2(-a.x, -a.y);
00121 }
00122 
00123 // addition
00124 inline dxHost dxDevice int2 operator+(int2 a, int2 b)
00125 {
00126   return make_int2(a.x + b.x, a.y + b.y);
00127 }
00128 inline dxHost dxDevice void operator+=(int2 &a, int2 b)
00129 {
00130   a.x += b.x; a.y += b.y;
00131 }
00132 
00133 // subtract
00134 inline dxHost dxDevice int2 operator-(int2 a, int2 b)
00135 {
00136   return make_int2(a.x - b.x, a.y - b.y);
00137 }
00138 inline dxHost dxDevice void operator-=(int2 &a, int2 b)
00139 {
00140   a.x -= b.x; a.y -= b.y;
00141 }
00142 
00143 // multiply
00144 inline dxHost dxDevice int2 operator*(int2 a, int2 b)
00145 {
00146   return make_int2(a.x * b.x, a.y * b.y);
00147 }
00148 inline dxHost dxDevice int2 operator*(int2 a, int s)
00149 {
00150   return make_int2(a.x * s, a.y * s);
00151 }
00152 inline dxHost dxDevice int2 operator*(int s, int2 a)
00153 {
00154   return make_int2(a.x * s, a.y * s);
00155 }
00156 inline dxHost dxDevice void operator*=(int2 &a, int s)
00157 {
00158   a.x *= s; a.y *= s;
00159 }
00160 
00161 // float3 functions
00163 
00164 // additional constructors
00165 inline dxHost dxDevice float3 make_float3(float s)
00166 {
00167   return make_float3(s, s, s);
00168 }
00169 inline dxHost dxDevice float3 make_float3(float4 a)
00170 {
00171   return make_float3(a.x, a.y, a.z);  // discards w
00172 }
00173 inline dxHost dxDevice float3 make_float3(int3 a)
00174 {
00175   return make_float3(float(a.x), float(a.y), float(a.z));
00176 }
00177 
00178 inline dxHost dxDevice double3 make_double3(double s)
00179 {
00180   return make_double3(s, s, s);
00181 }
00182 
00183 inline dxHost dxDevice double3 make_double3(double4 a)
00184 {
00185   return make_double3(a.x, a.y, a.z);  // discards w
00186 }
00187 inline dxHost dxDevice double3 make_double3(int3 a)
00188 {
00189   return make_double3(double(a.x), double(a.y), double(a.z));
00190 }
00191 
00192 // negate
00193 inline dxHost dxDevice float3 operator-(float3 &a)
00194 {
00195   return make_float3(-a.x, -a.y, -a.z);
00196 }
00197 
00198 // min
00199 static __inline__ dxHost dxDevice float3 fminf(float3 a, float3 b)
00200 {
00201   return make_float3(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z));
00202 }
00203 
00204 // max
00205 static __inline__ dxHost dxDevice float3 fmaxf(float3 a, float3 b)
00206 {
00207   return make_float3(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z));
00208 }
00209 
00210 // addition
00211 inline dxHost dxDevice float3 operator+(float3 a, float3 b)
00212 {
00213   return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
00214 }
00215 inline dxHost dxDevice double3 operator+(double3 a, double3 b)
00216 {
00217   return make_double3(a.x + b.x, a.y + b.y, a.z + b.z);
00218 }
00219 inline dxHost dxDevice float3 operator+(float3 a, float b)
00220 {
00221   return make_float3(a.x + b, a.y + b, a.z + b);
00222 }
00223 inline dxHost dxDevice double3 operator+(double3 a, double b)
00224 {
00225   return make_double3(a.x + b, a.y + b, a.z + b);
00226 }
00227 inline dxHost dxDevice void operator+=(float3 &a, float3 b)
00228 {
00229   a.x += b.x; a.y += b.y; a.z += b.z;
00230 }
00231 inline dxHost dxDevice void operator+=(double3 &a, double3 b)
00232 {
00233   a.x += b.x; a.y += b.y; a.z += b.z;
00234 }
00235 
00236 // subtract
00237 inline dxHost dxDevice float3 operator-(float3 a, float3 b)
00238 {
00239   return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
00240 }
00241 inline dxHost dxDevice float3 operator-(float3 a, float b)
00242 {
00243   return make_float3(a.x - b, a.y - b, a.z - b);
00244 }
00245 inline dxHost dxDevice void operator-=(float3 &a, float3 b)
00246 {
00247   a.x -= b.x; a.y -= b.y; a.z -= b.z;
00248 }
00249 
00250 // multiply
00251 inline dxHost dxDevice float3 operator*(float3 a, float3 b)
00252 {
00253   return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);
00254 }
00255 inline dxHost dxDevice float3 operator*(float3 a, float s)
00256 {
00257   return make_float3(a.x * s, a.y * s, a.z * s);
00258 }
00259 inline dxHost dxDevice float3 operator*(float s, float3 a)
00260 {
00261   return make_float3(a.x * s, a.y * s, a.z * s);
00262 }
00263 inline dxHost dxDevice void operator*=(float3 &a, float s)
00264 {
00265   a.x *= s; a.y *= s; a.z *= s;
00266 }
00267 inline dxHost dxDevice void operator*=(double3 &a, double s)
00268 {
00269   a.x *= s; a.y *= s; a.z *= s;
00270 }
00271 
00272 // divide
00273 inline dxHost dxDevice float3 operator/(float3 a, float3 b)
00274 {
00275   return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
00276 }
00277 inline dxHost dxDevice float3 operator/(float3 a, float s)
00278 {
00279   float inv = 1.0f / s;
00280   return a * inv;
00281 }
00282 inline dxHost dxDevice float3 operator/(float s, float3 a)
00283 {
00284   float inv = 1.0f / s;
00285   return a * inv;
00286 }
00287 inline dxHost dxDevice void operator/=(float3 &a, float s)
00288 {
00289   float inv = 1.0f / s;
00290   a *= inv;
00291 }
00292 
00293 // clamp
00294 inline dxDevice dxHost float3 clamp(float3 v, float a, float b)
00295 {
00296   return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
00297 }
00298 
00299 inline dxDevice dxHost float3 clamp(float3 v, float3 a, float3 b)
00300 {
00301   return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
00302 }
00303 
00304 // dot product
00305 inline dxHost dxDevice float dot(const float3& a, const float3& b)
00306 {
00307   return a.x * b.x + a.y * b.y + a.z * b.z;
00308 }
00309 
00310 inline dxHost dxDevice double dot(const double3& a, const double3& b)
00311 {
00312   return a.x * b.x + a.y * b.y + a.z * b.z;
00313 }
00314 // dot product
00315 inline dxHost dxDevice float dot(const float3& a, const float4& b)
00316 {
00317   return a.x * b.x + a.y * b.y + a.z * b.z;
00318 }
00319 
00320 inline dxHost dxDevice double dot(const double3& a, const double4& b)
00321 {
00322   return a.x * b.x + a.y * b.y + a.z * b.z;
00323 }
00324 // dot product
00325 inline dxHost dxDevice float dot(const float4& a, const float4& b)
00326 {
00327   return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
00328 }
00329 
00330 inline dxHost dxDevice double dot(const double4& a, const double4& b)
00331 {
00332   return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
00333 }
00334 
00335 // cross product
00336 inline dxHost dxDevice float3 cross(float3 a, float3 b)
00337 {
00338   return make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x);
00339 }
00340 
00341 // length
00342 inline dxHost dxDevice float length(float3 v)
00343 {
00344   return sqrtf(dot(v, v));
00345 }
00346 
00347 // normalize
00348 inline dxHost dxDevice float3 normalize(float3 v)
00349 {
00350   float invLen = 1.0f / sqrtf(dot(v, v));
00351   return v * invLen;
00352 }
00353 
00354 // floor
00355 inline dxHost dxDevice float3 floor(const float3 v)
00356 {
00357   return make_float3(floor(v.x), floor(v.y), floor(v.z));
00358 }
00359 
00360 // float4 functions
00362 
00363 // additional constructors
00364 inline dxHost dxDevice float4 make_float4(float s)
00365 {
00366   return make_float4(s, s, s, s);
00367 }
00368 inline dxHost dxDevice float4 make_float4(float3 a)
00369 {
00370   return make_float4(a.x, a.y, a.z, 0.0f);
00371 }
00372 inline dxHost dxDevice float4 make_float4(float3 a, float w)
00373 {
00374   return make_float4(a.x, a.y, a.z, w);
00375 }
00376 inline dxHost dxDevice float4 make_float4(const float& a, const float& b, const float& c)
00377 {
00378   return make_float4((float)a, (float)b, (float)c);
00379 }
00380 inline dxHost dxDevice float4 make_float4(int4 a)
00381 {
00382   return make_float4(float(a.x), float(a.y), float(a.z), float(a.w));
00383 }
00384 
00385 inline dxHost dxDevice double4 make_double4(double s)
00386 {
00387   return make_double4(s, s, s, s);
00388 }
00389 inline dxHost dxDevice double4 make_double4(double3 a)
00390 {
00391   return make_double4(a.x, a.y, a.z, 0.0f);
00392 }
00393 inline dxHost dxDevice double4 make_double4(double3 a, double w)
00394 {
00395   return make_double4(a.x, a.y, a.z, w);
00396 }
00397 inline dxHost dxDevice double4 make_double4(const double& a, const double& b, const double& c)
00398 {
00399   return make_double4((double)a, (double)b, (double)c);
00400 }
00401 inline dxHost dxDevice double4 make_double4(int4 a)
00402 {
00403   return make_double4(double(a.x), double(a.y), double(a.z), double(a.w));
00404 }
00405 inline dxHost dxDevice double4 make_fdouble4(double s)
00406 {
00407   double4 d;
00408   d.x = s;
00409   d.y = s;
00410   d.z = s;
00411   d.w = s;
00412   float* f;
00413   //f = reinterpret_cast<float4*>(&d);
00414   f = (float*)(&(d.x)); *f = (float)s;
00415   f = (float*)(&(d.y)); *f = (float)s;
00416   f = (float*)(&(d.z)); *f = (float)s;
00417   f = (float*)(&(d.w)); *f = (float)s;
00418   return d;
00419 }
00420 
00421 
00422 // negate
00423 inline dxHost dxDevice float4 operator-(float4 &a)
00424 {
00425   return make_float4(-a.x, -a.y, -a.z, -a.w);
00426 }
00427 
00428 // min
00429 static __inline__ dxHost dxDevice float4 fminf(float4 a, float4 b)
00430 {
00431   return make_float4(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z), fminf(a.w,b.w));
00432 }
00433 
00434 // max
00435 static __inline__ dxHost dxDevice float4 fmaxf(float4 a, float4 b)
00436 {
00437   return make_float4(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z), fmaxf(a.w,b.w));
00438 }
00439 
00440 // addition
00441 inline dxHost dxDevice float4 operator+(float4 a, float4 b)
00442 {
00443   return make_float4(a.x + b.x, a.y + b.y, a.z + b.z,  a.w + b.w);
00444 }
00445 inline dxHost dxDevice double4 operator+(double4 a, double4 b)
00446 {
00447   return make_double4(a.x + b.x, a.y + b.y, a.z + b.z,  a.w + b.w);
00448 }
00449 inline dxHost dxDevice void operator+=(float4 &a, float4 b)
00450 {
00451   a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w;
00452 }
00453 inline dxHost dxDevice void operator+=(double4 &a, double4 b)
00454 {
00455   a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w;
00456 }
00457 
00458 // subtract
00459 inline dxHost dxDevice float4 operator-(float4 a, float4 b)
00460 {
00461   return make_float4(a.x - b.x, a.y - b.y, a.z - b.z,  a.w - b.w);
00462 }
00463 inline dxHost dxDevice void operator-=(float4 &a, float4 b)
00464 {
00465   a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w;
00466 }
00467 
00468 // multiply
00469 template <typename T> inline dxHost dxDevice typename vec4<T>::Type operator*(typename vec4<T>::Type a, T s)
00470 {
00471   return make_vec4(a.x * s, a.y * s, a.z * s, a.w * s);
00472 }
00473 inline dxHost dxDevice float4 operator*(float s, float4 a)
00474 {
00475   return make_float4(a.x * s, a.y * s, a.z * s, a.w * s);
00476 }
00477 inline dxHost dxDevice void operator*=(float4 &a, float s)
00478 {
00479   a.x *= s; a.y *= s; a.z *= s; a.w *= s;
00480 }
00481 inline dxHost dxDevice void operator*=(double4 &a, double s)
00482 {
00483   a.x *= s; a.y *= s; a.z *= s; a.w *= s;
00484 }
00485 
00486 // divide
00487 inline dxHost dxDevice float4 operator/(float4 a, float4 b)
00488 {
00489   return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);
00490 }
00491 inline dxHost dxDevice float4 operator/(float4 a, float s)
00492 {
00493   float inv = 1.0f / s;
00494   return a * inv;
00495 }
00496 inline dxHost dxDevice float4 operator/(float s, float4 a)
00497 {
00498   float inv = 1.0f / s;
00499   return a * inv;
00500 }
00501 inline dxHost dxDevice void operator/=(float4 &a, float s)
00502 {
00503   float inv = 1.0f / s;
00504   a *= inv;
00505 }
00506 
00507 // clamp
00508 inline dxDevice dxHost float4 clamp(float4 v, float a, float b)
00509 {
00510   return make_float4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
00511 }
00512 
00513 inline dxDevice dxHost float4 clamp(float4 v, float4 a, float4 b)
00514 {
00515   return make_float4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
00516 }
00517 
00518 // dot product
00519 template <typename T> inline dxHost dxDevice T dot(typename vec4<T>::Type a, typename vec4<T>::Type b)
00520 {
00521   return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
00522 }
00523 
00524 // length
00525 inline dxHost dxDevice float length(float4 r)
00526 {
00527   return sqrtf(dot<float>(r, r));
00528 }
00529 
00530 // normalize
00531 inline dxHost dxDevice float4 normalize(float4 v)
00532 {
00533   float invLen = 1.0f / sqrtf(dot<float>(v, v));
00534   return v * invLen;
00535 }
00536 
00537 // floor
00538 inline dxHost dxDevice float4 floor(const float4 v)
00539 {
00540   return make_float4(floor(v.x), floor(v.y), floor(v.z), floor(v.w));
00541 }
00542 
00543 inline dxHost dxDevice vec3<float>::Type make_vec3(float a, float b, float c) {
00544   return make_float3(a,b,c);
00545 }
00546 
00547 inline dxHost dxDevice vec4<float>::Type make_vec4(const float& a, const float& b, const float& c) {
00548   return make_float4(a,b,c,(float)0.0);
00549 }
00550 
00551 inline dxHost dxDevice vec4<double>::Type make_vec4(const double& a, const double& b, const double& c) {
00552   return make_double4(a,b,c,(double)0.0);
00553 }
00554 
00555 inline dxHost dxDevice vec4<float>::Type make_vec4(float a, float b, float c, float d) {
00556   return make_float4(a,b,c,d);
00557 }
00558 
00559 inline dxHost dxDevice vec4<double>::Type make_vec4(double a, double b, double c, double d) {
00560   return make_double4(a,b,c,d);
00561 }
00562 inline dxHost dxDevice vec3<double>::Type make_vec3(double a, double b, double c) {
00563   return make_double3(a,b,c);
00564 }
00565 
00566 inline dxHost dxDevice vec4<float>::Type make_vec4( float3 a ) { return make_float4(a); }
00567 inline dxHost dxDevice vec4<double>::Type make_vec4( double3 a ) { return make_double4(a); }
00568 
00569 inline dxHost dxDevice vec4<float>::Type make_vec4( float a ) { return make_float4(a); }
00570 inline dxHost dxDevice vec4<double>::Type make_vec4( double a ) { return make_double4(a); }
00571 
00572 inline dxHost dxDevice vec3<float>::Type make_vec3( float4 a ) { return make_float3(a); }
00573 inline dxHost dxDevice vec3<double>::Type make_vec3( double4 a ) { return make_double3(a); }
00574 
00575 inline dxHost dxDevice vec3<float>::Type make_vec3( float a ) { return make_float3(a); }
00576 inline dxHost dxDevice vec3<double>::Type make_vec3( double a ) { return make_double3(a); }
00577 
00578 
00579 #endif
parallel_math.h