11 cudaError_t
error = cudaGetDeviceCount( &count );
13 if (
error == cudaErrorInsufficientDriver)
16 if (
error == cudaErrorNoDevice)
43 return prop.major < 2;
48 template <
class T>
inline void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute,
int device)
51 CUresult
error = cuDeviceGetAttribute( attribute, device_attribute, device );
52 if( CUDA_SUCCESS ==
error )
55 printf(
"Driver API error = %04d\n",
error);
59 inline int convertSMVer2Cores(
int major,
int minor)
67 SMtoCores gpuArchCoresPerSM[] = { { 0x10, 8 }, { 0x11, 8 }, { 0x12, 8 }, { 0x13, 8 }, { 0x20, 32 }, { 0x21, 48 }, {0x30, 192}, {0x35, 192}, { -1, -1 } };
70 while (gpuArchCoresPerSM[index].SM != -1)
72 if (gpuArchCoresPerSM[index].SM == ((major << 4) + minor) )
73 return gpuArchCoresPerSM[index].Cores;
76 printf(
"\nCan't determine number of cores. Unknown SM version %d.%d!\n", major, minor);
84 bool valid = (device >= 0) && (device < count);
86 int beg = valid ? device : 0;
87 int end = valid ? device+1 : count;
89 printf(
"*** CUDA Device Query (Runtime API) version (CUDART static linking) *** \n\n");
90 printf(
"Device count: %d\n", count);
92 int driverVersion = 0, runtimeVersion = 0;
96 const char *computeMode[] = {
97 "Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)",
98 "Exclusive (only one host thread in one process is able to use ::cudaSetDevice() with this device)",
99 "Prohibited (no host thread can use ::cudaSetDevice() with this device)",
100 "Exclusive Process (many threads in one process is able to use ::cudaSetDevice() with this device)",
105 for(
int dev = beg; dev < end; ++dev)
110 int sm_cores = convertSMVer2Cores(prop.major, prop.minor);
112 printf(
"\nDevice %d: \"%s\"\n", dev, prop.name);
113 printf(
" CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100);
114 printf(
" CUDA Capability Major/Minor version number: %d.%d\n", prop.major, prop.minor);
115 printf(
" Total amount of global memory: %.0f MBytes (%llu bytes)\n", (
float)prop.totalGlobalMem/1048576.0f, (
unsigned long long) prop.totalGlobalMem);
116 printf(
" (%2d) Multiprocessors x (%2d) CUDA Cores/MP: %d CUDA Cores\n", prop.multiProcessorCount, sm_cores, sm_cores * prop.multiProcessorCount);
117 printf(
" GPU Clock Speed: %.2f GHz\n", prop.clockRate * 1e-6f);
119 #if (CUDART_VERSION >= 4000)
121 int memoryClock, memBusWidth, L2CacheSize;
122 getCudaAttribute<int>( &memoryClock, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, dev );
123 getCudaAttribute<int>( &memBusWidth, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, dev );
124 getCudaAttribute<int>( &L2CacheSize, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, dev );
126 printf(
" Memory Clock rate: %.2f Mhz\n", memoryClock * 1e-3f);
127 printf(
" Memory Bus Width: %d-bit\n", memBusWidth);
129 printf(
" L2 Cache Size: %d bytes\n", L2CacheSize);
131 printf(
" Max Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d,%d), 3D=(%d,%d,%d)\n",
132 prop.maxTexture1D, prop.maxTexture2D[0], prop.maxTexture2D[1],
133 prop.maxTexture3D[0], prop.maxTexture3D[1], prop.maxTexture3D[2]);
134 printf(
" Max Layered Texture Size (dim) x layers 1D=(%d) x %d, 2D=(%d,%d) x %d\n",
135 prop.maxTexture1DLayered[0], prop.maxTexture1DLayered[1],
136 prop.maxTexture2DLayered[0], prop.maxTexture2DLayered[1], prop.maxTexture2DLayered[2]);
138 printf(
" Total amount of constant memory: %u bytes\n", (
int)prop.totalConstMem);
139 printf(
" Total amount of shared memory per block: %u bytes\n", (
int)prop.sharedMemPerBlock);
140 printf(
" Total number of registers available per block: %d\n", prop.regsPerBlock);
141 printf(
" Warp size: %d\n", prop.warpSize);
142 printf(
" Maximum number of threads per block: %d\n", prop.maxThreadsPerBlock);
143 printf(
" Maximum sizes of each dimension of a block: %d x %d x %d\n", prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2]);
144 printf(
" Maximum sizes of each dimension of a grid: %d x %d x %d\n", prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2]);
145 printf(
" Maximum memory pitch: %u bytes\n", (
int)prop.memPitch);
146 printf(
" Texture alignment: %u bytes\n", (
int)prop.textureAlignment);
148 #if CUDART_VERSION >= 4000
149 printf(
" Concurrent copy and execution: %s with %d copy engine(s)\n", (prop.deviceOverlap ?
"Yes" :
"No"), prop.asyncEngineCount);
151 printf(
" Concurrent copy and execution: %s\n", prop.deviceOverlap ?
"Yes" :
"No");
153 printf(
" Run time limit on kernels: %s\n", prop.kernelExecTimeoutEnabled ?
"Yes" :
"No");
154 printf(
" Integrated GPU sharing Host Memory: %s\n", prop.integrated ?
"Yes" :
"No");
155 printf(
" Support host page-locked memory mapping: %s\n", prop.canMapHostMemory ?
"Yes" :
"No");
157 printf(
" Concurrent kernel execution: %s\n", prop.concurrentKernels ?
"Yes" :
"No");
158 printf(
" Alignment requirement for Surfaces: %s\n", prop.surfaceAlignment ?
"Yes" :
"No");
159 printf(
" Device has ECC support enabled: %s\n", prop.ECCEnabled ?
"Yes" :
"No");
160 printf(
" Device is using TCC driver mode: %s\n", prop.tccDriver ?
"Yes" :
"No");
161 #if CUDART_VERSION >= 4000
162 printf(
" Device supports Unified Addressing (UVA): %s\n", prop.unifiedAddressing ?
"Yes" :
"No");
163 printf(
" Device PCI Bus ID / PCI location ID: %d / %d\n", prop.pciBusID, prop.pciDeviceID );
165 printf(
" Compute Mode:\n");
166 printf(
" %s \n", computeMode[prop.computeMode]);
170 printf(
"deviceQuery, CUDA Driver = CUDART");
171 printf(
", CUDA Driver Version = %d.%d", driverVersion / 1000, driverVersion % 100);
172 printf(
", CUDA Runtime Version = %d.%d", runtimeVersion/1000, runtimeVersion%100);
173 printf(
", NumDevs = %d\n\n", count);
180 bool valid = (device >= 0) && (device < count);
182 int beg = valid ? device : 0;
183 int end = valid ? device+1 : count;
185 int driverVersion = 0, runtimeVersion = 0;
189 for(
int dev = beg; dev < end; ++dev)
194 const char *arch_str = prop.major < 2 ?
" (pre-Fermi)" :
"";
195 printf(
"Device %d: \"%s\" %.0fMb", dev, prop.name, (
float)prop.totalGlobalMem/1048576.0f);
196 printf(
", sm_%d%d%s, %d cores", prop.major, prop.minor, arch_str, convertSMVer2Cores(prop.major, prop.minor) * prop.multiProcessorCount);
197 printf(
", Driver/Runtime ver.%d.%d/%d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100);
204 start = (double)cv::getTickCount();
209 time_ms_ += getTime ();
210 if (i_ % EACH == 0 && i_)
212 std::cout <<
"Average frame time = " << time_ms_ / EACH <<
"ms ( " << 1000.f * EACH / time_ms_ <<
"fps )" << std::endl;
220 return ((
double)cv::getTickCount() - start)*1000.0/cv::getTickFrequency();
225 start = (double)cv::getTickCount();
229 double time_ms = ((double)cv::getTickCount() - start)*1000.0/cv::getTickFrequency();
230 std::cout <<
"Time(" << name <<
") = " << time_ms <<
"ms" << std::endl;
235 return ((
double)cv::getTickCount() - start)*1000.0/cv::getTickFrequency();
240 const int iters[] = {10, 5, 4, 0};
241 const int levels =
sizeof(iters)/
sizeof(iters[0]);
247 p.intr =
Intr(575.816f, 575.816f,
p.cols/2 - 0.5f,
p.rows/2 - 0.5f);
249 p.shifting_distance = 0.5f;
250 p.distance_camera_target = 1.4;
252 p.volume_dims = Vec3i::all(512);
253 p.volume_size = Vec3f::all(3.f);
254 p.volume_pose =
Affine3f().translate(
Vec3f(-
p.volume_size[0]/2, -
p.volume_size[1]/2, -
p.volume_size[2]/2 ));
256 p.bilateral_sigma_depth = 0.04f;
257 p.bilateral_sigma_spatial = 4.5;
258 p.bilateral_kernel_size = 7;
260 p.icp_truncate_depth_dist = 0.f;
261 p.icp_dist_thres = 0.1f;
263 p.icp_iter_num.assign(iters, iters + levels);
265 p.tsdf_min_camera_movement = 0.f;
266 p.tsdf_trunc_dist = 0.04f;
267 p.tsdf_max_weight = 64;
269 p.raycast_step_factor = 0.75f;
270 p.gradient_delta_factor = 0.5f;
273 p.light_pose = Vec3f::all(0.f);
274 p.cmd_options =
NULL;