00001 #include <opencl_solver.h>
00002 #include <parallel_utils.h>
00003
00004 #include "opencl_kernels.h"
00005
00006 namespace parallel_ode
00007 {
00008
00009 template class OpenCLPGSSolver<dReal>;
00010
00011 template<typename T>
00012 void OpenCLPGSSolver<T>::initialize( )
00013 {
00014 ParallelPGSSolver<T,T,ParallelTypes::OpenCL>::initialize( );
00015 }
00016
00017 template<typename T>
00018 void OpenCLPGSSolver<T>::solveAndReduce( const int offset, const int batchSize )
00019 {
00020 oclPGSSolve( offset,
00021 batchSize,
00022 this->atomicsEnabled( ) );
00023
00024 if( this->reduceEnabled( ) ) {
00025 oclPGSReduce( this->bodyFAccReduction.getDeviceBuffer( ),
00026 this->bodyTAccReduction.getDeviceBuffer( ),
00027 this->reduceStrategy_ );
00028 }
00029 }
00030
00031 template<typename T>
00032 void OpenCLPGSSolver<T>::loadConstraints( )
00033 {
00034 ParallelPGSSolver<T,T,ParallelTypes::OpenCL>::loadConstraints( );
00035
00036
00037 if( this->reduceEnabled( ) ) {
00038 oclZeroVector(this->bodyFAccReduction.getDeviceBuffer( ), this->bodyFAccReduction.getSize( ), false);
00039 oclZeroVector(this->bodyTAccReduction.getDeviceBuffer( ), this->bodyTAccReduction.getSize( ), false);
00040 }
00041 }
00042
00043 template<typename T>
00044 void OpenCLPGSSolver<T>::loadKernels( )
00045 {
00046 oclPGSSolveInit( this->bodyIDs.getDeviceBuffer( ),
00047 this->fIDs.getDeviceBuffer( ),
00048 this->j0.getDeviceBuffer( ),
00049 this->ij0.getDeviceBuffer( ),
00050 this->bodyFAcc.getDeviceBuffer( ),
00051 this->bodyTAcc.getDeviceBuffer( ),
00052 this->bodyFAccReduction.getDeviceBuffer( ),
00053 this->bodyTAccReduction.getDeviceBuffer( ),
00054 this->lambda0.getDeviceBuffer( ),
00055 this->adcfm.getDeviceBuffer( ),
00056 this->rhs.getDeviceBuffer( ),
00057 this->lohiD.getDeviceBuffer( ),
00058 this->getBodyStride( ),
00059 this->getConstraintStride( ),
00060 this->reduceStrategy_ );
00061
00062 }
00063
00064 }