00001 #ifndef PARALLEL_UTILS_H 00002 #define PARALLEL_UTILS_H 00003 00004 #include "parallel_common.h" 00005 00006 #include <cstdlib> 00007 #include <cstdio> 00008 #include <string.h> 00009 #include <vector> 00010 #include <limits.h> 00011 00012 namespace parallel_utils 00013 { 00014 00015 #define VectorType ::std::vector 00016 00017 #ifdef VERBOSE 00018 #define IFVERBOSE(x) x 00019 #else 00020 #define IFVERBOSE(x) ((void)0) 00021 #endif 00022 00023 #ifdef TIMING 00024 #define IFTIMING(x) x 00025 #else 00026 #define IFTIMING(x) ((void)0) 00027 #endif 00028 00029 #ifdef BENCHMARKING 00030 #define IFBENCHMARKING(x) x 00031 #else 00032 #define IFBENCHMARKING(x) 00033 #endif 00034 00035 #ifdef ERROR 00036 #define COMPUTE_ERROR 1 00037 #else 00038 #define COMPUTE_ERROR 0 00039 #endif 00040 00042 00043 #define alignedSize(a) __alignedSize(a) 00044 #define alignSize(offset,alignment) (((offset) + (alignment) - 1) & ~ ((alignment) - 1)) 00045 #define alignDefaultSize(offset) alignSize(offset,ParallelOptions::DEFAULTALIGN) 00046 #define alignOffset(offset,alignment) (offset) = alignSize(offset,alignment) 00047 #define align(offset) alignOffset(offset,ParallelOptions::DEFAULTALIGN) 00048 00050 00051 inline int __alignedSize( VectorType<int>& vectorToAlign ) 00052 { 00053 int totalSize = 0; 00054 for( size_t i = 0; i < vectorToAlign.size(); i++ ) 00055 { 00056 totalSize += vectorToAlign[i]; 00057 align(totalSize); 00058 } 00059 return totalSize; 00060 } 00061 00062 inline int iDivUp(int a, int b) 00063 { 00064 return (a % b != 0) ? (a / b + 1) : (a / b); 00065 } 00066 00067 inline void computeGridSize(int n, int blockSize, int &numBlocks, int &numThreads) 00068 { 00069 numThreads = std::min(blockSize, n); 00070 numBlocks = iDivUp(n, numThreads); 00071 } 00072 00073 inline int computeStride(int n, int blockSize) 00074 { 00075 return iDivUp(n, blockSize) * blockSize; 00076 } 00077 00078 template <typename T> 00079 inline int computeElementsPerAlign( int align ) 00080 { 00081 return align / sizeof( T ); 00082 } 00083 00084 template <typename T> 00085 inline T iPower2Up(T k) 00086 { 00087 --k; 00088 for (unsigned int i=1; i<sizeof(T)*CHAR_BIT; i<<=1) 00089 k = k | k >> i; 00090 return k+1; 00091 } 00092 00093 template <typename T> 00094 inline T iPower2UpUnsigned(T k) { 00095 if (k == 0) 00096 return 1; 00097 --k; 00098 for (int i=1; i<sizeof(T)*CHAR_BIT; i<<=1) 00099 k = k | k >> i; 00100 return k+1; 00101 } 00102 00103 template <typename T> 00104 inline void fillSequentialVector( VectorType<T>& vectorToFill ) 00105 { 00106 for( size_t i = 0; i < vectorToFill.size(); ++i ) 00107 vectorToFill[ i ] = (T)i; 00108 } 00109 00110 template <typename T> 00111 inline void fillStridedVector( VectorType<T>& vectorToFill, T stride ) 00112 { 00113 for( size_t i = 0; i < vectorToFill.size(); ++i ) 00114 vectorToFill[ i ] = (T)i * stride; 00115 } 00116 00117 template <typename T> 00118 inline void permuteVector( VectorType<T>& vectorToPermute ) 00119 { 00120 for( size_t i = 0; i < vectorToPermute.size(); i++ ) 00121 { 00122 size_t j = rand() % (i + 1); 00123 T temp = vectorToPermute[ j ]; 00124 vectorToPermute[ j ] = vectorToPermute[ i ]; 00125 vectorToPermute[ i ] = temp; 00126 } 00127 } 00128 00129 inline unsigned int iPower2UpUnrolled(unsigned int v) 00130 { 00131 --v; 00132 v |= v >> 1; 00133 v |= v >> 2; 00134 v |= v >> 4; 00135 v |= v >> 8; 00136 v |= v >> 16; 00137 return ++v; 00138 } 00139 00140 inline void printIntVector( const IntVector& intVector ) { 00141 printf("{"); 00142 for(IntVector::const_iterator it = intVector.begin(); it != intVector.end(); ++it) { 00143 printf(" %d ", *it); 00144 } 00145 printf("}\n"); 00146 } 00147 00148 } 00149 00150 #endif