parallel_memory.h
Go to the documentation of this file.
00001 #ifndef PARALLEL_MEMORY_H
00002 #define PARALLEL_MEMORY_H
00003 
00004 #include "parallel_common.h"
00005 
00006 namespace parallel_utils
00007 {
00008 
00009 namespace CopyTypes
00010 {
00011   enum CopyType { COPY_SYNC, COPY_ASYNC, COPY_DEFAULT = COPY_ASYNC };
00012 }
00013 typedef CopyTypes::CopyType CopyType;
00014 
00015 template<typename T>
00016 static T offsetBuffer(T buffer, ArraySize offset) {
00017     return buffer + offset;
00018 }
00019 
00020 template <typename T, ParallelType PT> struct MemManager {
00021 
00022   typedef T* host_type;
00023   typedef T* device_type;
00024   typedef void** device_init_type;
00025   typedef int stream_type;
00026   typedef unsigned int mem_flags;
00027 
00028   inline static void allocateHost( void** ptr, size_t size, int flags ) {
00029     *ptr = malloc( size );
00030   }
00031 
00032   inline static void freeHost( void* ptr ) {
00033     free( ptr );
00034   }
00035 
00036   inline static void allocateDevice( void** ptr, size_t size ) {
00037     *ptr = NULL;
00038   }
00039 
00040   inline static void freeDevice( void* ptr ) {
00041 
00042   }
00043 
00044   inline static void copyToHost(void* dst, void* src, size_t size, CopyType copyType, stream_type stream ) {
00045 
00046   }
00047 
00048   inline static void copyToDevice(void* dst, void* src, size_t size, CopyType copyType, stream_type stream ) {
00049 
00050   }
00051 
00052 };
00053 
00054 #ifdef USE_CUDA
00055 
00056 template <typename T>               struct MemManager<T,ParallelTypes::CUDA> {
00057 
00058   typedef T* host_type;
00059   typedef T* device_type;
00060   typedef void** device_init_type;
00061   typedef cudaStream_t stream_type;
00062   typedef unsigned int mem_flags;
00063 
00064   const static mem_flags defaultAllocFlags = cudaHostAllocPortable;
00065 
00066   inline static void allocateHost( void** ptr, size_t size, int flags ) {
00067     cudaHostAlloc(ptr, size, flags );
00068   }
00069 
00070   inline static void freeHost( void* ptr ) {
00071     cudaFreeHost( ptr );
00072   }
00073 
00074   inline static void allocateDevice( void** ptr, size_t size ) {
00075     cudaMalloc(ptr, size);
00076   }
00077 
00078   inline static void freeDevice( void* ptr ) {
00079     cudaFree( ptr );
00080   }
00081 
00082   inline static void copyToHost(void* dst, void * src, size_t size, CopyType copyType, stream_type stream ) {
00083     if( CopyTypes::COPY_SYNC == copyType )
00084       cudaMemcpy(dst, src, size, cudaMemcpyDeviceToHost);
00085     else
00086       cudaMemcpyAsync(dst, src, size, cudaMemcpyDeviceToHost, stream);
00087   }
00088 
00089   inline static void copyToDevice(void* dst, void * src, size_t size, CopyType copyType, stream_type stream ) {
00090     if( CopyTypes::COPY_SYNC == copyType )
00091       cudaMemcpy(dst, src, size, cudaMemcpyHostToDevice);
00092     else
00093       cudaMemcpyAsync(dst, src, size, cudaMemcpyHostToDevice, stream);
00094   }
00095 };
00096 
00097 #elif USE_OPENCL
00098 
00099 // NOTE: If you don't have OpenCL 1.1, you're out of luck here =/
00100 template<>
00101 cl_mem offsetBuffer<cl_mem>(cl_mem buffer, ArraySize offset) {
00102 
00103   if( NULL == buffer || offset == 0 ) return buffer;
00104 
00105   size_t originalBufferSize;
00106   cl_int ciErrNum;
00107   ciErrNum = clGetMemObjectInfo( buffer,
00108                                  CL_MEM_SIZE,
00109                                  sizeof(size_t),
00110                                  &originalBufferSize,
00111                                  NULL );
00112   oclCheckError(ciErrNum, CL_SUCCESS);
00113 
00114   cl_mem_flags originalBufferMemflags;
00115   ciErrNum = clGetMemObjectInfo( buffer,
00116                                  CL_MEM_FLAGS,
00117                                  sizeof(cl_mem_flags),
00118                                  &originalBufferMemflags,
00119                                  NULL );
00120   oclCheckError(ciErrNum, CL_SUCCESS);
00121 
00122   cl_buffer_region subBufferRegion = { offset, originalBufferSize - offset };
00123   cl_mem subBuffer = clCreateSubBuffer( buffer,
00124                                         originalBufferMemflags,
00125                                         CL_BUFFER_CREATE_TYPE_REGION,
00126                                         &subBufferRegion,
00127                                         &ciErrNum );
00128   oclCheckError(ciErrNum, CL_SUCCESS);
00129 
00130   return subBuffer;
00131 }
00132 
00133 template <typename T>              struct MemManager<T, ParallelType::OpenCL> {
00134 
00135   typedef T* host_type;
00136   typedef cl_mem device_type;
00137   typedef cl_mem* device_init_type;
00138   typedef int stream_type;
00139   typedef cl_mem_flags mem_flags;
00140 
00141   inline static void allocateHost( void** ptr, size_t size, int flags ) {
00142     *ptr = malloc( size );
00143   }
00144 
00145   inline static void freeHost( void* ptr ) {
00146     if( ptr ) free( ptr );
00147   }
00148 
00149   inline static void allocateDevice( cl_mem* ptr, size_t size ) {
00150     cl_int ciErrNum;
00151     *ptr = clCreateBuffer( dxGetDeviceContext(), CL_MEM_READ_WRITE, size, NULL, &ciErrNum );
00152     oclCheckError(ciErrNum, CL_SUCCESS);
00153   }
00154 
00155   inline static void freeDevice( void* ptr ) {
00156     if( ptr ) {
00157       cl_int ciErrNum;
00158       ciErrNum = clReleaseMemObject( ((device_type)ptr) );
00159       oclCheckError(ciErrNum, CL_SUCCESS);
00160     }
00161   }
00162 
00163   inline static void copyToHost(void* dst, cl_mem src, size_t size, CopyType copyType, stream_type stream ) {
00164       cl_int ciErrNum;
00165       ciErrNum = clEnqueueReadBuffer(dxGetDeviceQueue(),
00166                                      src,
00167                                      (CopyTypes::COPY_SYNC == copyType ),
00168                                      0,
00169                                      size,
00170                                      dst,
00171                                      0,
00172                                      NULL,
00173                                      NULL);
00174       oclCheckError(ciErrNum, CL_SUCCESS);
00175   }
00176 
00177   inline static void copyToDevice(cl_mem dst, void* src, size_t size, CopyType copyType, stream_type stream ) {
00178       cl_int ciErrNum;
00179       ciErrNum = clEnqueueWriteBuffer(dxGetDeviceQueue(),
00180                                       dst,
00181                                       (CopyTypes::COPY_SYNC == copyType ),
00182                                       0,
00183                                       size,
00184                                       src,
00185                                       0,
00186                                       NULL,
00187                                       NULL);
00188       oclCheckError(ciErrNum, CL_SUCCESS);
00189   }
00190 };
00191 
00192 #endif
00193 
00194 }
00195 
00196 #endif
00197 


parallel_quickstep
Author(s): Jared Duke
autogenerated on Fri Jan 3 2014 11:36:56