00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00021
00022 #if defined(CUDA_SIFTGPU_ENABLED)
00023
00024 #include "GL/glew.h"
00025 #include <iostream>
00026 #include <vector>
00027 #include <algorithm>
00028 #include <stdlib.h>
00029 #include <math.h>
00030 using namespace std;
00031
00032 #include <cuda.h>
00033 #include <cuda_runtime_api.h>
00034 #include <cuda_gl_interop.h>
00035
00036 #include "GlobalUtil.h"
00037 #include "GLTexImage.h"
00038 #include "CuTexImage.h"
00039 #include "ProgramCU.h"
00040
00041 #if CUDA_VERSION <= 2010 && defined(SIFTGPU_ENABLE_LINEAR_TEX2D)
00042 #error "Require CUDA 2.2 or higher"
00043 #endif
00044
00045
00046 CuTexImage::CuTexImage()
00047 {
00048 _cuData = NULL;
00049 _cuData2D = NULL;
00050 _fromPBO = 0;
00051 _numChannel = _numBytes = 0;
00052 _imgWidth = _imgHeight = _texWidth = _texHeight = 0;
00053 }
00054
00055 CuTexImage::CuTexImage(int width, int height, int nchannel, GLuint pbo)
00056 {
00057 _cuData = NULL;
00058
00059
00060 GLint bsize, esize = width * height * nchannel * sizeof(float);
00061 glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, pbo);
00062 glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize);
00063 if(bsize < esize)
00064 {
00065 glBufferData(GL_PIXEL_PACK_BUFFER_ARB, esize, NULL, GL_STATIC_DRAW_ARB);
00066 glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize);
00067 }
00068 glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0);
00069 if(bsize >=esize)
00070 {
00071
00072 cudaGLRegisterBufferObject(pbo);
00073 cudaGLMapBufferObject(&_cuData, pbo);
00074 ProgramCU::CheckErrorCUDA("cudaGLMapBufferObject");
00075 _fromPBO = pbo;
00076 }else
00077 {
00078 _cuData = NULL;
00079 _fromPBO = 0;
00080 }
00081 if(_cuData)
00082 {
00083 _numBytes = bsize;
00084 _imgWidth = width;
00085 _imgHeight = height;
00086 _numChannel = nchannel;
00087 }else
00088 {
00089 _numBytes = 0;
00090 _imgWidth = 0;
00091 _imgHeight = 0;
00092 _numChannel = 0;
00093 }
00094
00095 _texWidth = _texHeight =0;
00096
00097 _cuData2D = NULL;
00098 }
00099
00100 CuTexImage::~CuTexImage()
00101 {
00102
00103
00104 if(_fromPBO)
00105 {
00106 cudaGLUnmapBufferObject(_fromPBO);
00107 cudaGLUnregisterBufferObject(_fromPBO);
00108 }else if(_cuData)
00109 {
00110 cudaFree(_cuData);
00111 }
00112 if(_cuData2D) cudaFreeArray(_cuData2D);
00113 }
00114
00115 void CuTexImage::SetImageSize(int width, int height)
00116 {
00117 _imgWidth = width;
00118 _imgHeight = height;
00119 }
00120
00121 void CuTexImage::InitTexture(int width, int height, int nchannel)
00122 {
00123 int size;
00124 _imgWidth = width;
00125 _imgHeight = height;
00126 _numChannel = min(max(nchannel, 1), 4);
00127
00128 size = width * height * _numChannel * sizeof(float);
00129
00130 if(size <= _numBytes) return;
00131
00132 if(_cuData) cudaFree(_cuData);
00133
00134
00135 cudaMalloc(&_cuData, _numBytes = size);
00136
00137 #ifdef _DEBUG
00138 ProgramCU::CheckErrorCUDA("CuTexImage::InitTexture");
00139 #endif
00140 }
00141
00142 void CuTexImage::CopyFromHost(const void * buf)
00143 {
00144 if(_cuData == NULL) return;
00145 cudaMemcpy( _cuData, buf, _imgWidth * _imgHeight * _numChannel * sizeof(float), cudaMemcpyHostToDevice);
00146 }
00147
00148 void CuTexImage::CopyToHost(void * buf)
00149 {
00150 if(_cuData == NULL) return;
00151 cudaMemcpy(buf, _cuData, _imgWidth * _imgHeight * _numChannel * sizeof(float), cudaMemcpyDeviceToHost);
00152 }
00153
00154 void CuTexImage::CopyToHost(void * buf, int stream)
00155 {
00156 if(_cuData == NULL) return;
00157 cudaMemcpyAsync(buf, _cuData, _imgWidth * _imgHeight * _numChannel * sizeof(float), cudaMemcpyDeviceToHost, (cudaStream_t)stream);
00158 }
00159
00160 void CuTexImage::InitTexture2D()
00161 {
00162 #if !defined(SIFTGPU_ENABLE_LINEAR_TEX2D)
00163 if(_cuData2D && (_texWidth < _imgWidth || _texHeight < _imgHeight))
00164 {
00165 cudaFreeArray(_cuData2D);
00166 _cuData2D = NULL;
00167 }
00168
00169 if(_cuData2D == NULL)
00170 {
00171 _texWidth = max(_texWidth, _imgWidth);
00172 _texHeight = max(_texHeight, _imgHeight);
00173 cudaChannelFormatDesc desc;
00174 desc.f = cudaChannelFormatKindFloat;
00175 desc.x = sizeof(float) * 8;
00176 desc.y = _numChannel >=2 ? sizeof(float) * 8 : 0;
00177 desc.z = _numChannel >=3 ? sizeof(float) * 8 : 0;
00178 desc.w = _numChannel >=4 ? sizeof(float) * 8 : 0;
00179 cudaMallocArray(&_cuData2D, &desc, _texWidth, _texHeight);
00180 ProgramCU::CheckErrorCUDA("cudaMallocArray");
00181 }
00182 #endif
00183 }
00184
00185 void CuTexImage::CopyToTexture2D()
00186 {
00187 #if !defined(SIFTGPU_ENABLE_LINEAR_TEX2D)
00188 InitTexture2D();
00189
00190 if(_cuData2D)
00191 {
00192 cudaMemcpy2DToArray(_cuData2D, 0, 0, _cuData, _imgWidth* _numChannel* sizeof(float) ,
00193 _imgWidth * _numChannel*sizeof(float), _imgHeight, cudaMemcpyDeviceToDevice);
00194 ProgramCU::CheckErrorCUDA("cudaMemcpy2DToArray");
00195 }
00196 #endif
00197
00198 }
00199
00200 int CuTexImage::DebugCopyToTexture2D()
00201 {
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212 return 1;
00213 }
00214
00215
00216
00217 void CuTexImage::CopyFromPBO(int width, int height, GLuint pbo)
00218 {
00219 void* pbuf =NULL;
00220 GLint esize = width * height * sizeof(float);
00221 cudaGLRegisterBufferObject(pbo);
00222 cudaGLMapBufferObject(&pbuf, pbo);
00223
00224 cudaMemcpy(_cuData, pbuf, esize, cudaMemcpyDeviceToDevice);
00225
00226 cudaGLUnmapBufferObject(pbo);
00227 cudaGLUnregisterBufferObject(pbo);
00228 }
00229
00230 int CuTexImage::CopyToPBO(GLuint pbo)
00231 {
00232 void* pbuf =NULL;
00233 GLint bsize, esize = _imgWidth * _imgHeight * sizeof(float) * _numChannel;
00234 glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, pbo);
00235 glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize);
00236 if(bsize < esize)
00237 {
00238 glBufferData(GL_PIXEL_PACK_BUFFER_ARB, esize*3/2, NULL, GL_STATIC_DRAW_ARB);
00239 glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize);
00240 }
00241 glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0);
00242
00243 if(bsize >= esize)
00244 {
00245 cudaGLRegisterBufferObject(pbo);
00246 cudaGLMapBufferObject(&pbuf, pbo);
00247 cudaMemcpy(pbuf, _cuData, esize, cudaMemcpyDeviceToDevice);
00248 cudaGLUnmapBufferObject(pbo);
00249 cudaGLUnregisterBufferObject(pbo);
00250 return 1;
00251 }else
00252 {
00253 return 0;
00254 }
00255 }
00256
00257 #endif
00258