00001 #ifndef PARALLEL_MEMORY_H
00002 #define PARALLEL_MEMORY_H
00003
00004 #include "parallel_common.h"
00005
00006 namespace parallel_utils
00007 {
00008
00009 namespace CopyTypes
00010 {
00011 enum CopyType { COPY_SYNC, COPY_ASYNC, COPY_DEFAULT = COPY_ASYNC };
00012 }
00013 typedef CopyTypes::CopyType CopyType;
00014
00015 template<typename T>
00016 static T offsetBuffer(T buffer, ArraySize offset) {
00017 return buffer + offset;
00018 }
00019
00020 template <typename T, ParallelType PT> struct MemManager {
00021
00022 typedef T* host_type;
00023 typedef T* device_type;
00024 typedef void** device_init_type;
00025 typedef int stream_type;
00026 typedef unsigned int mem_flags;
00027
00028 inline static void allocateHost( void** ptr, size_t size, int flags ) {
00029 *ptr = malloc( size );
00030 }
00031
00032 inline static void freeHost( void* ptr ) {
00033 free( ptr );
00034 }
00035
00036 inline static void allocateDevice( void** ptr, size_t size ) {
00037 *ptr = NULL;
00038 }
00039
00040 inline static void freeDevice( void* ptr ) {
00041
00042 }
00043
00044 inline static void copyToHost(void* dst, void* src, size_t size, CopyType copyType, stream_type stream ) {
00045
00046 }
00047
00048 inline static void copyToDevice(void* dst, void* src, size_t size, CopyType copyType, stream_type stream ) {
00049
00050 }
00051
00052 };
00053
00054 #ifdef USE_CUDA
00055
00056 template <typename T> struct MemManager<T,ParallelTypes::CUDA> {
00057
00058 typedef T* host_type;
00059 typedef T* device_type;
00060 typedef void** device_init_type;
00061 typedef cudaStream_t stream_type;
00062 typedef unsigned int mem_flags;
00063
00064 const static mem_flags defaultAllocFlags = cudaHostAllocPortable;
00065
00066 inline static void allocateHost( void** ptr, size_t size, int flags ) {
00067 cudaHostAlloc(ptr, size, flags );
00068 }
00069
00070 inline static void freeHost( void* ptr ) {
00071 cudaFreeHost( ptr );
00072 }
00073
00074 inline static void allocateDevice( void** ptr, size_t size ) {
00075 cudaMalloc(ptr, size);
00076 }
00077
00078 inline static void freeDevice( void* ptr ) {
00079 cudaFree( ptr );
00080 }
00081
00082 inline static void copyToHost(void* dst, void * src, size_t size, CopyType copyType, stream_type stream ) {
00083 if( CopyTypes::COPY_SYNC == copyType )
00084 cudaMemcpy(dst, src, size, cudaMemcpyDeviceToHost);
00085 else
00086 cudaMemcpyAsync(dst, src, size, cudaMemcpyDeviceToHost, stream);
00087 }
00088
00089 inline static void copyToDevice(void* dst, void * src, size_t size, CopyType copyType, stream_type stream ) {
00090 if( CopyTypes::COPY_SYNC == copyType )
00091 cudaMemcpy(dst, src, size, cudaMemcpyHostToDevice);
00092 else
00093 cudaMemcpyAsync(dst, src, size, cudaMemcpyHostToDevice, stream);
00094 }
00095 };
00096
00097 #elif USE_OPENCL
00098
00099
00100 template<>
00101 cl_mem offsetBuffer<cl_mem>(cl_mem buffer, ArraySize offset) {
00102
00103 if( NULL == buffer || offset == 0 ) return buffer;
00104
00105 size_t originalBufferSize;
00106 cl_int ciErrNum;
00107 ciErrNum = clGetMemObjectInfo( buffer,
00108 CL_MEM_SIZE,
00109 sizeof(size_t),
00110 &originalBufferSize,
00111 NULL );
00112 oclCheckError(ciErrNum, CL_SUCCESS);
00113
00114 cl_mem_flags originalBufferMemflags;
00115 ciErrNum = clGetMemObjectInfo( buffer,
00116 CL_MEM_FLAGS,
00117 sizeof(cl_mem_flags),
00118 &originalBufferMemflags,
00119 NULL );
00120 oclCheckError(ciErrNum, CL_SUCCESS);
00121
00122 cl_buffer_region subBufferRegion = { offset, originalBufferSize - offset };
00123 cl_mem subBuffer = clCreateSubBuffer( buffer,
00124 originalBufferMemflags,
00125 CL_BUFFER_CREATE_TYPE_REGION,
00126 &subBufferRegion,
00127 &ciErrNum );
00128 oclCheckError(ciErrNum, CL_SUCCESS);
00129
00130 return subBuffer;
00131 }
00132
00133 template <typename T> struct MemManager<T, ParallelType::OpenCL> {
00134
00135 typedef T* host_type;
00136 typedef cl_mem device_type;
00137 typedef cl_mem* device_init_type;
00138 typedef int stream_type;
00139 typedef cl_mem_flags mem_flags;
00140
00141 inline static void allocateHost( void** ptr, size_t size, int flags ) {
00142 *ptr = malloc( size );
00143 }
00144
00145 inline static void freeHost( void* ptr ) {
00146 if( ptr ) free( ptr );
00147 }
00148
00149 inline static void allocateDevice( cl_mem* ptr, size_t size ) {
00150 cl_int ciErrNum;
00151 *ptr = clCreateBuffer( dxGetDeviceContext(), CL_MEM_READ_WRITE, size, NULL, &ciErrNum );
00152 oclCheckError(ciErrNum, CL_SUCCESS);
00153 }
00154
00155 inline static void freeDevice( void* ptr ) {
00156 if( ptr ) {
00157 cl_int ciErrNum;
00158 ciErrNum = clReleaseMemObject( ((device_type)ptr) );
00159 oclCheckError(ciErrNum, CL_SUCCESS);
00160 }
00161 }
00162
00163 inline static void copyToHost(void* dst, cl_mem src, size_t size, CopyType copyType, stream_type stream ) {
00164 cl_int ciErrNum;
00165 ciErrNum = clEnqueueReadBuffer(dxGetDeviceQueue(),
00166 src,
00167 (CopyTypes::COPY_SYNC == copyType ),
00168 0,
00169 size,
00170 dst,
00171 0,
00172 NULL,
00173 NULL);
00174 oclCheckError(ciErrNum, CL_SUCCESS);
00175 }
00176
00177 inline static void copyToDevice(cl_mem dst, void* src, size_t size, CopyType copyType, stream_type stream ) {
00178 cl_int ciErrNum;
00179 ciErrNum = clEnqueueWriteBuffer(dxGetDeviceQueue(),
00180 dst,
00181 (CopyTypes::COPY_SYNC == copyType ),
00182 0,
00183 size,
00184 src,
00185 0,
00186 NULL,
00187 NULL);
00188 oclCheckError(ciErrNum, CL_SUCCESS);
00189 }
00190 };
00191
00192 #endif
00193
00194 }
00195
00196 #endif
00197