$search
00001 #include <opencl_solver.h> 00002 #include <parallel_utils.h> 00003 00004 #include "opencl_kernels.h" 00005 00006 namespace parallel_ode 00007 { 00008 00009 template class OpenCLPGSSolver<dReal>; 00010 00011 template<typename T> 00012 void OpenCLPGSSolver<T>::initialize( ) 00013 { 00014 ParallelPGSSolver<T,T,ParallelTypes::OpenCL>::initialize( ); 00015 } 00016 00017 template<typename T> 00018 void OpenCLPGSSolver<T>::solveAndReduce( const int offset, const int batchSize ) 00019 { 00020 oclPGSSolve( offset, 00021 batchSize, 00022 this->atomicsEnabled( ) ); 00023 00024 if( this->reduceEnabled( ) ) { 00025 oclPGSReduce( this->bodyFAccReduction.getDeviceBuffer( ), 00026 this->bodyTAccReduction.getDeviceBuffer( ), 00027 this->reduceStrategy_ ); 00028 } 00029 } 00030 00031 template<typename T> 00032 void OpenCLPGSSolver<T>::loadConstraints( ) 00033 { 00034 ParallelPGSSolver<T,T,ParallelTypes::OpenCL>::loadConstraints( ); 00035 00036 // Zero out the force accumulation vector 00037 if( this->reduceEnabled( ) ) { 00038 oclZeroVector(this->bodyFAccReduction.getDeviceBuffer( ), this->bodyFAccReduction.getSize( ), false); 00039 oclZeroVector(this->bodyTAccReduction.getDeviceBuffer( ), this->bodyTAccReduction.getSize( ), false); 00040 } 00041 } 00042 00043 template<typename T> 00044 void OpenCLPGSSolver<T>::loadKernels( ) 00045 { 00046 oclPGSSolveInit( this->bodyIDs.getDeviceBuffer( ), 00047 this->fIDs.getDeviceBuffer( ), 00048 this->j0.getDeviceBuffer( ), 00049 this->ij0.getDeviceBuffer( ), 00050 this->bodyFAcc.getDeviceBuffer( ), 00051 this->bodyTAcc.getDeviceBuffer( ), 00052 this->bodyFAccReduction.getDeviceBuffer( ), 00053 this->bodyTAccReduction.getDeviceBuffer( ), 00054 this->lambda0.getDeviceBuffer( ), 00055 this->adcfm.getDeviceBuffer( ), 00056 this->rhs.getDeviceBuffer( ), 00057 this->lohiD.getDeviceBuffer( ), 00058 this->getBodyStride( ), 00059 this->getConstraintStride( ), 00060 this->reduceStrategy_ ); 00061 00062 } 00063 00064 }