Go to the documentation of this file.00001 #ifndef PARALLEL_KERNELS_NONTEMPLATE_H
00002 #define PARALLEL_KERNELS_NONTEMPLATE_H
00003
00004 #define C_ID(index,i) (index+(cStride*(i)))
00005 #define B_ID(index,i) (index+(bStride*(i)))
00006
00007 dxGlobal void
00008 parallelZero( dxDeviceData dReal *buffer, int bufferSize )
00009 {
00010 int index = dxGlobalIdxX();
00011
00012 if( index < bufferSize )
00013 buffer[ index ] = dParallelZero;
00014 }
00015
00016 dxGlobal void
00017 parallelZero4( dxDeviceData dReal4 *buffer, int bufferSize )
00018 {
00019 int index = dxGlobalIdxX();
00020
00021 if( index < bufferSize )
00022 buffer[ index ] = make_real4( dParallelZero );
00023 }
00024
00025 dxGlobal void
00026 parallelSORLCP( dxDeviceData dReal4 *fc0_reduction,
00027 dxDeviceData dReal4 *fc1_reduction,
00028 dxDeviceData dReal *lambda,
00029 dxDeviceData const int4 *bodyIDs,
00030 dxDeviceData const int *fIDs,
00031 dxDeviceData const dReal4 *j,
00032 dxDeviceData const dReal4 *ij,
00033 dxDeviceData const dReal4 *fc0,
00034 dxDeviceData const dReal4 *fc1,
00035 dxDeviceData const dReal *adcfm,
00036 dxDeviceData const dReal *rhs,
00037 dxDeviceData const dReal *lohi,
00038 const int offset,
00039 const int numConstraints,
00040 const int bStride,
00041 const int cStride )
00042 {
00043 int index = dxGlobalIdxX();
00044
00045 if( index >= numConstraints )
00046 return;
00047
00048 index += offset;
00049
00050 dReal old_lambda = lambda[ index ];
00051
00052 int4 bodyID = bodyIDs[ index ];
00053
00054 dReal4 fc00 = fc0[ bodyID.x ];
00055 dReal4 fc01 = fc1[ bodyID.x ];
00056 dReal4 fc10 = make_real4( dParallelZero );
00057 dReal4 fc11 = make_real4( dParallelZero );
00058
00059 dReal4 j0_temp = j[ index ];
00060 dReal4 j1_temp = j[ C_ID(index,1) ];
00061
00062 dReal delta = rhs[ index ] - old_lambda * adcfm[ index ];
00063
00064 if( bodyID.y >= 0 ) {
00065 fc10 = fc0[ bodyID.y ];
00066 fc11 = fc1[ bodyID.y ];
00067 }
00068
00069 {
00070 delta -= dot( fc00, j0_temp );
00071 delta -= dot( fc01, j1_temp );
00072 if (bodyID.y >= 0) {
00073 dReal4 j2_temp = j[ C_ID(index,2) ];
00074 dReal4 j3_temp = j[ C_ID(index,3) ];
00075 delta -= dot( fc10, j2_temp );
00076 delta -= dot( fc11, j3_temp );
00077 }
00078 }
00079
00080 {
00081 dReal lo_act = lohi[ index ];
00082 dReal hi_act = lohi[ C_ID(index,1) ];
00083
00084 int fID = fIDs[ index ];
00085 if (fID >= 0) {
00086 hi_act = fabs( hi_act * lambda[ fID ]);
00087 lo_act = -hi_act;
00088 }
00089
00090 dReal new_lambda = old_lambda + delta;
00091 dReal final_lambda = new_lambda;
00092
00093 if (new_lambda < lo_act) {
00094 delta = lo_act-old_lambda;
00095 final_lambda = lo_act;
00096 }
00097 else if (new_lambda > hi_act) {
00098 delta = hi_act-old_lambda;
00099 final_lambda = hi_act;
00100 }
00101 lambda[ index ] = final_lambda;
00102 }
00103
00104 j0_temp = ij[ index ];
00105 j1_temp = ij[ C_ID(index,1) ];
00106
00107 {
00108 j0_temp *= delta;
00109 j1_temp *= delta;
00110
00111 fc0_reduction[ bodyID.z ] += j0_temp;
00112 fc1_reduction[ bodyID.z ] += j1_temp;
00113
00114 if( bodyID.y >= 0 ) {
00115 dReal4 j2_temp = ij[ C_ID(index,2) ];
00116 dReal4 j3_temp = ij[ C_ID(index,3) ];
00117
00118 j2_temp *= delta;
00119 j3_temp *= delta;
00120
00121 fc0_reduction[ bodyID.w ] += j2_temp;
00122 fc1_reduction[ bodyID.w ] += j3_temp;
00123 }
00124 }
00125 }
00126
00127 dxGlobal void
00128 parallelReduce( dxDeviceData dReal4 *fc0,
00129 dxDeviceData dReal4 *fc1,
00130 dxDeviceData const dReal4 *fc0_reduction,
00131 dxDeviceData const dReal4 *fc1_reduction,
00132 const int reductionStride,
00133 const int bodySize,
00134 const int reductionSize )
00135 {
00136 const int index = dxGlobalIdxX();
00137 if( index >= bodySize ) return;
00138
00139 int nextIndex = index + reductionStride;
00140
00141 dReal4 sum0 = fc0_reduction[ index ];
00142 dReal4 sum1 = fc1_reduction[ index ];
00143
00144 while(nextIndex < reductionSize) {
00145 sum0 += fc0_reduction[ nextIndex ];
00146 sum1 += fc1_reduction[ nextIndex ];
00147 nextIndex += reductionStride;
00148 }
00149
00150 fc0[ index ] += sum0;
00151 fc1[ index ] += sum1;
00152 }
00153
00154 #endif