00001 #include <openmp_solver.h>
00002 #include <parallel_utils.h>
00003
00004 #include "openmp_kernels.h"
00005
00006 namespace parallel_ode
00007 {
00008
00009 template class OpenMPPGSSolver<dReal>;
00010
00011 template<typename T>
00012 void OpenMPPGSSolver<T>::solveAndReduce( const int offset, const int batchSize )
00013 {
00014 ompPGSSolve<T>( this->bodyIDs.getHostBuffer( ),
00015 this->fIDs.getHostBuffer( ),
00016 this->j0.getHostBuffer( ),
00017 this->ij0.getHostBuffer( ),
00018 this->bodyFAcc.getHostBuffer( ),
00019 this->bodyTAcc.getHostBuffer( ),
00020 this->bodyFAccReduction.getHostBuffer( ),
00021 this->bodyTAccReduction.getHostBuffer( ),
00022 this->lambda0.getHostBuffer( ),
00023 this->adcfm.getHostBuffer( ),
00024 this->rhs.getHostBuffer( ),
00025 this->lohiD.getHostBuffer( ),
00026 offset,
00027 batchSize,
00028 this->atomicsEnabled( ),
00029 this->getBodyStride( ),
00030 this->getConstraintStride( ) );
00031
00032 if( this->reduceEnabled( ) ) {
00033 ompPGSReduce<T>( this->bodyFAcc.getHostBuffer( ),
00034 this->bodyTAcc.getHostBuffer( ),
00035 this->bodyFAccReduction.getHostBuffer( ),
00036 this->bodyTAccReduction.getHostBuffer( ),
00037 this->reduceStrategy_ );
00038 }
00039 }
00040
00041 template<typename T>
00042 void OpenMPPGSSolver<T>::loadConstraints( )
00043 {
00044 ParallelPGSSolver<T,T,ParallelTypes::OpenMP>::loadConstraints( );
00045
00046
00047 if( this->reduceEnabled( ) ) {
00048 ompZeroVector<Vec4T>(this->bodyFAccReduction.getHostBuffer( ), this->bodyFAccReduction.getSize( ));
00049 ompZeroVector<Vec4T>(this->bodyTAccReduction.getHostBuffer( ), this->bodyTAccReduction.getSize( ));
00050 }
00051 }
00052
00053 }