11 cudaError_t
error = cudaGetDeviceCount( &count );
13 if (error == cudaErrorInsufficientDriver)
16 if (error == cudaErrorNoDevice)
43 return prop.major < 2;
48 template <
class T>
inline void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute,
int device)
51 CUresult error = cuDeviceGetAttribute( attribute, device_attribute, device );
52 if( CUDA_SUCCESS == error )
55 printf(
"Driver API error = %04d\n", error);
59 inline int convertSMVer2Cores(
int major,
int minor)
67 SMtoCores gpuArchCoresPerSM[] = { { 0x10, 8 }, { 0x11, 8 }, { 0x12, 8 }, { 0x13, 8 }, { 0x20, 32 }, { 0x21, 48 }, {0x30, 192}, {0x35, 192}, { -1, -1 } };
70 while (gpuArchCoresPerSM[index].SM != -1)
72 if (gpuArchCoresPerSM[index].SM == ((major << 4) + minor) )
73 return gpuArchCoresPerSM[index].Cores;
76 printf(
"\nCan't determine number of cores. Unknown SM version %d.%d!\n", major, minor);
84 bool valid = (device >= 0) && (device < count);
86 int beg = valid ? device : 0;
87 int end = valid ? device+1 : count;
89 printf(
"*** CUDA Device Query (Runtime API) version (CUDART static linking) *** \n\n");
90 printf(
"Device count: %d\n", count);
92 int driverVersion = 0, runtimeVersion = 0;
96 const char *computeMode[] = {
97 "Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)",
98 "Exclusive (only one host thread in one process is able to use ::cudaSetDevice() with this device)",
99 "Prohibited (no host thread can use ::cudaSetDevice() with this device)",
100 "Exclusive Process (many threads in one process is able to use ::cudaSetDevice() with this device)",
105 for(
int dev = beg; dev < end; ++dev)
110 int sm_cores = convertSMVer2Cores(prop.major, prop.minor);
112 printf(
"\nDevice %d: \"%s\"\n", dev, prop.name);
113 printf(
" CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100);
114 printf(
" CUDA Capability Major/Minor version number: %d.%d\n", prop.major, prop.minor);
115 printf(
" Total amount of global memory: %.0f MBytes (%llu bytes)\n", (
float)prop.totalGlobalMem/1048576.0f, (
unsigned long long) prop.totalGlobalMem);
116 printf(
" (%2d) Multiprocessors x (%2d) CUDA Cores/MP: %d CUDA Cores\n", prop.multiProcessorCount, sm_cores, sm_cores * prop.multiProcessorCount);
117 printf(
" GPU Clock Speed: %.2f GHz\n", prop.clockRate * 1e-6f);
119 #if (CUDART_VERSION >= 4000) 121 int memoryClock, memBusWidth, L2CacheSize;
122 getCudaAttribute<int>( &memoryClock, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, dev );
123 getCudaAttribute<int>( &memBusWidth, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, dev );
124 getCudaAttribute<int>( &L2CacheSize, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, dev );
126 printf(
" Memory Clock rate: %.2f Mhz\n", memoryClock * 1e-3f);
127 printf(
" Memory Bus Width: %d-bit\n", memBusWidth);
129 printf(
" L2 Cache Size: %d bytes\n", L2CacheSize);
131 printf(
" Max Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d,%d), 3D=(%d,%d,%d)\n",
132 prop.maxTexture1D, prop.maxTexture2D[0], prop.maxTexture2D[1],
133 prop.maxTexture3D[0], prop.maxTexture3D[1], prop.maxTexture3D[2]);
134 printf(
" Max Layered Texture Size (dim) x layers 1D=(%d) x %d, 2D=(%d,%d) x %d\n",
135 prop.maxTexture1DLayered[0], prop.maxTexture1DLayered[1],
136 prop.maxTexture2DLayered[0], prop.maxTexture2DLayered[1], prop.maxTexture2DLayered[2]);
138 printf(
" Total amount of constant memory: %u bytes\n", (
int)prop.totalConstMem);
139 printf(
" Total amount of shared memory per block: %u bytes\n", (
int)prop.sharedMemPerBlock);
140 printf(
" Total number of registers available per block: %d\n", prop.regsPerBlock);
141 printf(
" Warp size: %d\n", prop.warpSize);
142 printf(
" Maximum number of threads per block: %d\n", prop.maxThreadsPerBlock);
143 printf(
" Maximum sizes of each dimension of a block: %d x %d x %d\n", prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2]);
144 printf(
" Maximum sizes of each dimension of a grid: %d x %d x %d\n", prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2]);
145 printf(
" Maximum memory pitch: %u bytes\n", (
int)prop.memPitch);
146 printf(
" Texture alignment: %u bytes\n", (
int)prop.textureAlignment);
148 #if CUDART_VERSION >= 4000 149 printf(
" Concurrent copy and execution: %s with %d copy engine(s)\n", (prop.deviceOverlap ?
"Yes" :
"No"), prop.asyncEngineCount);
151 printf(
" Concurrent copy and execution: %s\n", prop.deviceOverlap ?
"Yes" :
"No");
153 printf(
" Run time limit on kernels: %s\n", prop.kernelExecTimeoutEnabled ?
"Yes" :
"No");
154 printf(
" Integrated GPU sharing Host Memory: %s\n", prop.integrated ?
"Yes" :
"No");
155 printf(
" Support host page-locked memory mapping: %s\n", prop.canMapHostMemory ?
"Yes" :
"No");
157 printf(
" Concurrent kernel execution: %s\n", prop.concurrentKernels ?
"Yes" :
"No");
158 printf(
" Alignment requirement for Surfaces: %s\n", prop.surfaceAlignment ?
"Yes" :
"No");
159 printf(
" Device has ECC support enabled: %s\n", prop.ECCEnabled ?
"Yes" :
"No");
160 printf(
" Device is using TCC driver mode: %s\n", prop.tccDriver ?
"Yes" :
"No");
161 #if CUDART_VERSION >= 4000 162 printf(
" Device supports Unified Addressing (UVA): %s\n", prop.unifiedAddressing ?
"Yes" :
"No");
163 printf(
" Device PCI Bus ID / PCI location ID: %d / %d\n", prop.pciBusID, prop.pciDeviceID );
165 printf(
" Compute Mode:\n");
166 printf(
" %s \n", computeMode[prop.computeMode]);
170 printf(
"deviceQuery, CUDA Driver = CUDART");
171 printf(
", CUDA Driver Version = %d.%d", driverVersion / 1000, driverVersion % 100);
172 printf(
", CUDA Runtime Version = %d.%d", runtimeVersion/1000, runtimeVersion%100);
173 printf(
", NumDevs = %d\n\n", count);
180 bool valid = (device >= 0) && (device < count);
182 int beg = valid ? device : 0;
183 int end = valid ? device+1 : count;
185 int driverVersion = 0, runtimeVersion = 0;
189 for(
int dev = beg; dev < end; ++dev)
194 const char *arch_str = prop.major < 2 ?
" (pre-Fermi)" :
"";
195 printf(
"Device %d: \"%s\" %.0fMb", dev, prop.name, (
float)prop.totalGlobalMem/1048576.0f);
196 printf(
", sm_%d%d%s, %d cores", prop.major, prop.minor, arch_str, convertSMVer2Cores(prop.major, prop.minor) * prop.multiProcessorCount);
197 printf(
", Driver/Runtime ver.%d.%d/%d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100);
204 start = (double)cv::getTickCount();
210 if (i_ %
EACH == 0 && i_)
212 std::cout <<
"Average frame time = " <<
time_ms_ /
EACH <<
"ms ( " << 1000.f *
EACH /
time_ms_ <<
"fps )" << std::endl;
220 return ((
double)cv::getTickCount() -
start)*1000.0/cv::getTickFrequency();
225 start = (double)cv::getTickCount();
229 double time_ms = ((double)cv::getTickCount() -
start)*1000.0/cv::getTickFrequency();
230 std::cout <<
"Time(" <<
name <<
") = " << time_ms <<
"ms" << std::endl;
235 return ((
double)cv::getTickCount() -
start)*1000.0/cv::getTickFrequency();
240 const int iters[] = {10, 5, 4, 0};
241 const int levels =
sizeof(iters)/
sizeof(iters[0]);
float icp_truncate_depth_dist
KF_EXPORTS bool checkIfPreFermiGPU(int device)
KF_EXPORTS int getCudaEnabledDeviceCount()
float bilateral_sigma_depth
static KinFuParams default_params()
KF_EXPORTS void printShortCudaDeviceInfo(int device)
#define cudaSafeCall(expr)
double distance_camera_target
KF_EXPORTS std::string getDeviceName(int device)
float tsdf_min_camera_movement
float raycast_step_factor
int bilateral_kernel_size
KF_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func="")
Error handler. All GPU functions from this subsystem call the function to report an error...
ScopeTime(const char *name)
float gradient_delta_factor
float deg2rad(float alpha)
KF_EXPORTS void printCudaDeviceInfo(int device)
float bilateral_sigma_spatial
SampledScopeTime(double &time_ms)
std::vector< int > icp_iter_num
KF_EXPORTS void setDevice(int device)