00001 #include <cuda_solver.h>
00002 #include <parallel_utils.h>
00003 #include <cuda_utils.h>
00004
00005 #include "cuda_kernels.cuh"
00006
00007 namespace parallel_ode
00008 {
00009
00010 #ifdef CUDA_DOUBLESUPPORT
00011 template class CudaPGSSolver<dReal,dReal>;
00012 #else
00013 template class CudaPGSSolver<float,dReal>;
00014 #endif
00015
00016 template<typename CudaT,typename ParamsT>
00017 void CudaPGSSolver<CudaT,ParamsT>::initialize( )
00018 {
00019 ParallelPGSSolver<CudaT,ParamsT,ParallelTypes::CUDA>::initialize( );
00020
00021 MemFlags flags = cudaHostAllocDefault;
00022 flags |= this->wcMemEnabled() ? cudaHostAllocWriteCombined : 0;
00023 flags |= this->pinnedMemEnabled() ? cudaHostAllocPortable : 0;
00024 this->setMemFlags( flags );
00025 }
00026
00027 template<typename CudaT,typename ParamsT>
00028 void CudaPGSSolver<CudaT,ParamsT>::preProcessDevice( const CudaT sorParam, const CudaT stepSize )
00029 {
00031 }
00032
00033 template<typename CudaT,typename ParamsT>
00034 void CudaPGSSolver<CudaT,ParamsT>::solveAndReduce( const int offset, const int batchSize )
00035 {
00036
00037 cudaPGSSolve<CudaT>( this->bodyIDs.getDeviceBuffer( ),
00038 this->fIDs.getDeviceBuffer( ),
00039 this->j0.getDeviceBuffer( ),
00040 this->ij0.getDeviceBuffer( ),
00041 this->bodyFAcc.getDeviceBuffer( ),
00042 this->bodyTAcc.getDeviceBuffer( ),
00043 this->bodyFAccReduction.getDeviceBuffer( ),
00044 this->bodyTAccReduction.getDeviceBuffer( ),
00045 this->lambda0.getDeviceBuffer( ),
00046 this->adcfm.getDeviceBuffer( ),
00047 this->rhs.getDeviceBuffer( ),
00048 this->lohiD.getDeviceBuffer( ),
00049 offset,
00050 batchSize,
00051 this->atomicsEnabled( ),
00052 this->getBodyStride( ),
00053 this->getConstraintStride( ) );
00054
00055 if( this->reduceEnabled( ) ) {
00056 cudaPGSReduce<CudaT>( this->bodyFAcc.getDeviceBuffer( ),
00057 this->bodyTAcc.getDeviceBuffer( ),
00058 this->bodyFAccReduction.getDeviceBuffer( ),
00059 this->bodyTAccReduction.getDeviceBuffer( ),
00060 this->reduceStrategy_ );
00061 }
00062 }
00063
00064 template<typename CudaT,typename ParamsT>
00065 void CudaPGSSolver<CudaT,ParamsT>::loadConstraints( )
00066 {
00067 ParallelPGSSolver<CudaT,ParamsT, ParallelTypes::CUDA>::loadConstraints( );
00068
00069
00070 if( this->reduceEnabled( ) ) {
00071 cudaZeroVector<Vec4T>(this->bodyFAccReduction.getDeviceBuffer( ), this->bodyFAccReduction.getSize( ));
00072 cudaZeroVector<Vec4T>(this->bodyTAccReduction.getDeviceBuffer( ), this->bodyTAccReduction.getSize( ));
00073 }
00074 }
00075
00076 }