00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00021
00022 #if defined(CUDA_SIFTGPU_ENABLED)
00023
00024 #include "GL/glew.h"
00025 #include <iostream>
00026 #include <vector>
00027 #include <algorithm>
00028 #include <stdlib.h>
00029 #include <math.h>
00030 using namespace std;
00031
00032
00033 #include <cuda.h>
00034 #include <cuda_runtime_api.h>
00035 #include <cuda_gl_interop.h>
00036
00037 #include "GlobalUtil.h"
00038 #include "GLTexImage.h"
00039 #include "CuTexImage.h"
00040 #include "ProgramCU.h"
00041
00042 #if CUDA_VERSION <= 2010 && defined(SIFTGPU_ENABLE_LINEAR_TEX2D)
00043 #error "Require CUDA 2.2 or higher"
00044 #endif
00045
00046
00047 CuTexImage::CuTexImage()
00048 {
00049 _cuData = NULL;
00050 _cuData2D = NULL;
00051 _fromPBO = 0;
00052 _numChannel = _numBytes = 0;
00053 _imgWidth = _imgHeight = _texWidth = _texHeight = 0;
00054 }
00055
00056 CuTexImage::CuTexImage(int width, int height, int nchannel, GLuint pbo)
00057 {
00058 _cuData = NULL;
00059
00060
00061 GLint bsize, esize = width * height * nchannel * sizeof(float);
00062 glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, pbo);
00063 glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize);
00064 if(bsize < esize)
00065 {
00066 glBufferData(GL_PIXEL_PACK_BUFFER_ARB, esize, NULL, GL_STATIC_DRAW_ARB);
00067 glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize);
00068 }
00069 glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0);
00070 if(bsize >=esize)
00071 {
00072
00073 cudaGLRegisterBufferObject(pbo);
00074 cudaGLMapBufferObject(&_cuData, pbo);
00075 ProgramCU::CheckErrorCUDA("cudaGLMapBufferObject");
00076 _fromPBO = pbo;
00077 }else
00078 {
00079 _cuData = NULL;
00080 _fromPBO = 0;
00081 }
00082 if(_cuData)
00083 {
00084 _numBytes = bsize;
00085 _imgWidth = width;
00086 _imgHeight = height;
00087 _numChannel = nchannel;
00088 }else
00089 {
00090 _numBytes = 0;
00091 _imgWidth = 0;
00092 _imgHeight = 0;
00093 _numChannel = 0;
00094 }
00095
00096 _texWidth = _texHeight =0;
00097
00098 _cuData2D = NULL;
00099 }
00100
00101 CuTexImage::~CuTexImage()
00102 {
00103
00104
00105 if(_fromPBO)
00106 {
00107 cudaGLUnmapBufferObject(_fromPBO);
00108 cudaGLUnregisterBufferObject(_fromPBO);
00109 }else if(_cuData)
00110 {
00111 cudaFree(_cuData);
00112 }
00113 if(_cuData2D) cudaFreeArray(_cuData2D);
00114 }
00115
00116 void CuTexImage::SetImageSize(int width, int height)
00117 {
00118 _imgWidth = width;
00119 _imgHeight = height;
00120 }
00121
00122 void CuTexImage::InitTexture(int width, int height, int nchannel)
00123 {
00124 int size;
00125 _imgWidth = width;
00126 _imgHeight = height;
00127 _numChannel = min(max(nchannel, 1), 4);
00128
00129 size = width * height * _numChannel * sizeof(float);
00130
00131 if(size <= _numBytes) return;
00132
00133 if(_cuData) cudaFree(_cuData);
00134
00135
00136 cudaMalloc(&_cuData, _numBytes = size);
00137
00138 #ifdef _DEBUG
00139 ProgramCU::CheckErrorCUDA("CuTexImage::InitTexture");
00140 #endif
00141 }
00142
00143 void CuTexImage::CopyFromHost(const void * buf)
00144 {
00145 if(_cuData == NULL) return;
00146 cudaMemcpy( _cuData, buf, _imgWidth * _imgHeight * _numChannel * sizeof(float), cudaMemcpyHostToDevice);
00147 }
00148
00149 void CuTexImage::CopyToHost(void * buf)
00150 {
00151 if(_cuData == NULL) return;
00152 cudaMemcpy(buf, _cuData, _imgWidth * _imgHeight * _numChannel * sizeof(float), cudaMemcpyDeviceToHost);
00153 }
00154
00155 void CuTexImage::CopyToHost(void * buf, int stream)
00156 {
00157 if(_cuData == NULL) return;
00158 cudaMemcpyAsync(buf, _cuData, _imgWidth * _imgHeight * _numChannel * sizeof(float), cudaMemcpyDeviceToHost, (cudaStream_t)stream);
00159 }
00160
00161 void CuTexImage::InitTexture2D()
00162 {
00163 #if !defined(SIFTGPU_ENABLE_LINEAR_TEX2D)
00164 if(_cuData2D && (_texWidth < _imgWidth || _texHeight < _imgHeight))
00165 {
00166 cudaFreeArray(_cuData2D);
00167 _cuData2D = NULL;
00168 }
00169
00170 if(_cuData2D == NULL)
00171 {
00172 _texWidth = max(_texWidth, _imgWidth);
00173 _texHeight = max(_texHeight, _imgHeight);
00174 cudaChannelFormatDesc desc;
00175 desc.f = cudaChannelFormatKindFloat;
00176 desc.x = sizeof(float) * 8;
00177 desc.y = _numChannel >=2 ? sizeof(float) * 8 : 0;
00178 desc.z = _numChannel >=3 ? sizeof(float) * 8 : 0;
00179 desc.w = _numChannel >=4 ? sizeof(float) * 8 : 0;
00180 cudaMallocArray(&_cuData2D, &desc, _texWidth, _texHeight);
00181 ProgramCU::CheckErrorCUDA("cudaMallocArray");
00182 }
00183 #endif
00184 }
00185
00186 void CuTexImage::CopyToTexture2D()
00187 {
00188 #if !defined(SIFTGPU_ENABLE_LINEAR_TEX2D)
00189 InitTexture2D();
00190
00191 if(_cuData2D)
00192 {
00193 cudaMemcpy2DToArray(_cuData2D, 0, 0, _cuData, _imgWidth* _numChannel* sizeof(float) ,
00194 _imgWidth * _numChannel*sizeof(float), _imgHeight, cudaMemcpyDeviceToDevice);
00195 ProgramCU::CheckErrorCUDA("cudaMemcpy2DToArray");
00196 }
00197 #endif
00198
00199 }
00200
00201 int CuTexImage::DebugCopyToTexture2D()
00202 {
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213 return 1;
00214 }
00215
00216
00217
00218 void CuTexImage::CopyFromPBO(int width, int height, GLuint pbo)
00219 {
00220 void* pbuf =NULL;
00221 GLint esize = width * height * sizeof(float);
00222 cudaGLRegisterBufferObject(pbo);
00223 cudaGLMapBufferObject(&pbuf, pbo);
00224
00225 cudaMemcpy(_cuData, pbuf, esize, cudaMemcpyDeviceToDevice);
00226
00227 cudaGLUnmapBufferObject(pbo);
00228 cudaGLUnregisterBufferObject(pbo);
00229 }
00230
00231 int CuTexImage::CopyToPBO(GLuint pbo)
00232 {
00233 void* pbuf =NULL;
00234 GLint bsize, esize = _imgWidth * _imgHeight * sizeof(float) * _numChannel;
00235 glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, pbo);
00236 glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize);
00237 if(bsize < esize)
00238 {
00239 glBufferData(GL_PIXEL_PACK_BUFFER_ARB, esize*3/2, NULL, GL_STATIC_DRAW_ARB);
00240 glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize);
00241 }
00242 glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0);
00243
00244 if(bsize >= esize)
00245 {
00246 cudaGLRegisterBufferObject(pbo);
00247 cudaGLMapBufferObject(&pbuf, pbo);
00248 cudaMemcpy(pbuf, _cuData, esize, cudaMemcpyDeviceToDevice);
00249 cudaGLUnmapBufferObject(pbo);
00250 cudaGLUnregisterBufferObject(pbo);
00251 return 1;
00252 }else
00253 {
00254 return 0;
00255 }
00256 }
00257
00258 #endif
00259