core.cpp
Go to the documentation of this file.
1 #include <kfusion/kinfu.hpp>
2 #include <kfusion/safe_call.hpp>
3 
4 #include <cuda.h>
5 #include <cstdio>
6 #include <iostream>
7 
9 {
10  int count;
11  cudaError_t error = cudaGetDeviceCount( &count );
12 
13  if (error == cudaErrorInsufficientDriver)
14  return -1;
15 
16  if (error == cudaErrorNoDevice)
17  return 0;
18 
20  return count;
21 }
22 
23 void kf::cuda::setDevice(int device)
24 {
25  cudaSafeCall( cudaSetDevice( device ) );
26 }
27 
28 std::string kf::cuda::getDeviceName(int device)
29 {
30  cudaDeviceProp prop;
31  cudaSafeCall( cudaGetDeviceProperties(&prop, device) );
32 
33  return prop.name;
34 }
35 
36 bool kf::cuda::checkIfPreFermiGPU(int device)
37 {
38  if (device < 0)
39  cudaSafeCall( cudaGetDevice(&device) );
40 
41  cudaDeviceProp prop;
42  cudaSafeCall( cudaGetDeviceProperties(&prop, device) );
43  return prop.major < 2; // CC == 1.x
44 }
45 
46 namespace
47 {
48  template <class T> inline void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute, int device)
49  {
50  *attribute = T();
51  CUresult error = cuDeviceGetAttribute( attribute, device_attribute, device );
52  if( CUDA_SUCCESS == error )
53  return;
54 
55  printf("Driver API error = %04d\n", error);
56  kfusion::cuda::error("driver API error", __FILE__, __LINE__);
57  }
58 
59  inline int convertSMVer2Cores(int major, int minor)
60  {
61  // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
62  typedef struct {
63  int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
64  int Cores;
65  } SMtoCores;
66 
67  SMtoCores gpuArchCoresPerSM[] = { { 0x10, 8 }, { 0x11, 8 }, { 0x12, 8 }, { 0x13, 8 }, { 0x20, 32 }, { 0x21, 48 }, {0x30, 192}, {0x35, 192}, { -1, -1 } };
68 
69  int index = 0;
70  while (gpuArchCoresPerSM[index].SM != -1)
71  {
72  if (gpuArchCoresPerSM[index].SM == ((major << 4) + minor) )
73  return gpuArchCoresPerSM[index].Cores;
74  index++;
75  }
76  printf("\nCan't determine number of cores. Unknown SM version %d.%d!\n", major, minor);
77  return 0;
78  }
79 }
80 
81 void kf::cuda::printCudaDeviceInfo(int device)
82 {
83  int count = getCudaEnabledDeviceCount();
84  bool valid = (device >= 0) && (device < count);
85 
86  int beg = valid ? device : 0;
87  int end = valid ? device+1 : count;
88 
89  printf("*** CUDA Device Query (Runtime API) version (CUDART static linking) *** \n\n");
90  printf("Device count: %d\n", count);
91 
92  int driverVersion = 0, runtimeVersion = 0;
93  cudaSafeCall( cudaDriverGetVersion(&driverVersion) );
94  cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
95 
96  const char *computeMode[] = {
97  "Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)",
98  "Exclusive (only one host thread in one process is able to use ::cudaSetDevice() with this device)",
99  "Prohibited (no host thread can use ::cudaSetDevice() with this device)",
100  "Exclusive Process (many threads in one process is able to use ::cudaSetDevice() with this device)",
101  "Unknown",
102  NULL
103  };
104 
105  for(int dev = beg; dev < end; ++dev)
106  {
107  cudaDeviceProp prop;
108  cudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
109 
110  int sm_cores = convertSMVer2Cores(prop.major, prop.minor);
111 
112  printf("\nDevice %d: \"%s\"\n", dev, prop.name);
113  printf(" CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100);
114  printf(" CUDA Capability Major/Minor version number: %d.%d\n", prop.major, prop.minor);
115  printf(" Total amount of global memory: %.0f MBytes (%llu bytes)\n", (float)prop.totalGlobalMem/1048576.0f, (unsigned long long) prop.totalGlobalMem);
116  printf(" (%2d) Multiprocessors x (%2d) CUDA Cores/MP: %d CUDA Cores\n", prop.multiProcessorCount, sm_cores, sm_cores * prop.multiProcessorCount);
117  printf(" GPU Clock Speed: %.2f GHz\n", prop.clockRate * 1e-6f);
118 
119 #if (CUDART_VERSION >= 4000)
120  // This is not available in the CUDA Runtime API, so we make the necessary calls the driver API to support this for output
121  int memoryClock, memBusWidth, L2CacheSize;
122  getCudaAttribute<int>( &memoryClock, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, dev );
123  getCudaAttribute<int>( &memBusWidth, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, dev );
124  getCudaAttribute<int>( &L2CacheSize, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, dev );
125 
126  printf(" Memory Clock rate: %.2f Mhz\n", memoryClock * 1e-3f);
127  printf(" Memory Bus Width: %d-bit\n", memBusWidth);
128  if (L2CacheSize)
129  printf(" L2 Cache Size: %d bytes\n", L2CacheSize);
130 
131  printf(" Max Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d,%d), 3D=(%d,%d,%d)\n",
132  prop.maxTexture1D, prop.maxTexture2D[0], prop.maxTexture2D[1],
133  prop.maxTexture3D[0], prop.maxTexture3D[1], prop.maxTexture3D[2]);
134  printf(" Max Layered Texture Size (dim) x layers 1D=(%d) x %d, 2D=(%d,%d) x %d\n",
135  prop.maxTexture1DLayered[0], prop.maxTexture1DLayered[1],
136  prop.maxTexture2DLayered[0], prop.maxTexture2DLayered[1], prop.maxTexture2DLayered[2]);
137 #endif
138  printf(" Total amount of constant memory: %u bytes\n", (int)prop.totalConstMem);
139  printf(" Total amount of shared memory per block: %u bytes\n", (int)prop.sharedMemPerBlock);
140  printf(" Total number of registers available per block: %d\n", prop.regsPerBlock);
141  printf(" Warp size: %d\n", prop.warpSize);
142  printf(" Maximum number of threads per block: %d\n", prop.maxThreadsPerBlock);
143  printf(" Maximum sizes of each dimension of a block: %d x %d x %d\n", prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2]);
144  printf(" Maximum sizes of each dimension of a grid: %d x %d x %d\n", prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2]);
145  printf(" Maximum memory pitch: %u bytes\n", (int)prop.memPitch);
146  printf(" Texture alignment: %u bytes\n", (int)prop.textureAlignment);
147 
148 #if CUDART_VERSION >= 4000
149  printf(" Concurrent copy and execution: %s with %d copy engine(s)\n", (prop.deviceOverlap ? "Yes" : "No"), prop.asyncEngineCount);
150 #else
151  printf(" Concurrent copy and execution: %s\n", prop.deviceOverlap ? "Yes" : "No");
152 #endif
153  printf(" Run time limit on kernels: %s\n", prop.kernelExecTimeoutEnabled ? "Yes" : "No");
154  printf(" Integrated GPU sharing Host Memory: %s\n", prop.integrated ? "Yes" : "No");
155  printf(" Support host page-locked memory mapping: %s\n", prop.canMapHostMemory ? "Yes" : "No");
156 
157  printf(" Concurrent kernel execution: %s\n", prop.concurrentKernels ? "Yes" : "No");
158  printf(" Alignment requirement for Surfaces: %s\n", prop.surfaceAlignment ? "Yes" : "No");
159  printf(" Device has ECC support enabled: %s\n", prop.ECCEnabled ? "Yes" : "No");
160  printf(" Device is using TCC driver mode: %s\n", prop.tccDriver ? "Yes" : "No");
161 #if CUDART_VERSION >= 4000
162  printf(" Device supports Unified Addressing (UVA): %s\n", prop.unifiedAddressing ? "Yes" : "No");
163  printf(" Device PCI Bus ID / PCI location ID: %d / %d\n", prop.pciBusID, prop.pciDeviceID );
164 #endif
165  printf(" Compute Mode:\n");
166  printf(" %s \n", computeMode[prop.computeMode]);
167  }
168 
169  printf("\n");
170  printf("deviceQuery, CUDA Driver = CUDART");
171  printf(", CUDA Driver Version = %d.%d", driverVersion / 1000, driverVersion % 100);
172  printf(", CUDA Runtime Version = %d.%d", runtimeVersion/1000, runtimeVersion%100);
173  printf(", NumDevs = %d\n\n", count);
174  fflush(stdout);
175 }
176 
177 void kf::cuda::printShortCudaDeviceInfo(int device)
178 {
179  int count = getCudaEnabledDeviceCount();
180  bool valid = (device >= 0) && (device < count);
181 
182  int beg = valid ? device : 0;
183  int end = valid ? device+1 : count;
184 
185  int driverVersion = 0, runtimeVersion = 0;
186  cudaSafeCall( cudaDriverGetVersion(&driverVersion) );
187  cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
188 
189  for(int dev = beg; dev < end; ++dev)
190  {
191  cudaDeviceProp prop;
192  cudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
193 
194  const char *arch_str = prop.major < 2 ? " (pre-Fermi)" : "";
195  printf("Device %d: \"%s\" %.0fMb", dev, prop.name, (float)prop.totalGlobalMem/1048576.0f);
196  printf(", sm_%d%d%s, %d cores", prop.major, prop.minor, arch_str, convertSMVer2Cores(prop.major, prop.minor) * prop.multiProcessorCount);
197  printf(", Driver/Runtime ver.%d.%d/%d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100);
198  }
199  fflush(stdout);
200 }
201 
202 kf::SampledScopeTime::SampledScopeTime(double& time_ms) : time_ms_(time_ms)
203 {
204  start = (double)cv::getTickCount();
205 }
207 {
208  static int i_ = 0;
209  time_ms_ += getTime ();
210  if (i_ % EACH == 0 && i_)
211  {
212  std::cout << "Average frame time = " << time_ms_ / EACH << "ms ( " << 1000.f * EACH / time_ms_ << "fps )" << std::endl;
213  time_ms_ = 0.0;
214  }
215  ++i_;
216 }
217 
219 {
220  return ((double)cv::getTickCount() - start)*1000.0/cv::getTickFrequency();
221 }
222 
223 kf::ScopeTime::ScopeTime(const char *name_) : name(name_)
224 {
225  start = (double)cv::getTickCount();
226 }
228 {
229  double time_ms = ((double)cv::getTickCount() - start)*1000.0/cv::getTickFrequency();
230  std::cout << "Time(" << name << ") = " << time_ms << "ms" << std::endl;
231 }
232 
234 {
235  return ((double)cv::getTickCount() - start)*1000.0/cv::getTickFrequency();
236 }
237 
239 {
240  const int iters[] = {10, 5, 4, 0};
241  const int levels = sizeof(iters)/sizeof(iters[0]);
242 
243  KinFuParams p;
244 
245  p.cols = 640; //pixels
246  p.rows = 480; //pixels
247  p.intr = Intr(575.816f, 575.816f, p.cols/2 - 0.5f, p.rows/2 - 0.5f);
248  //p.intr = Intr(543.285034f, 548.561951f, 319.980045f, 243.665918f);
249  p.shifting_distance = 0.5f; //meters to go before shifting the volume
250  p.distance_camera_target = 1.4;
251 
252  p.volume_dims = Vec3i::all(512); //number of voxels
253  p.volume_size = Vec3f::all(3.f); //meters
254  p.volume_pose = Affine3f().translate(Vec3f(-p.volume_size[0]/2, -p.volume_size[1]/2, -p.volume_size[2]/2 ));
255 
256  p.bilateral_sigma_depth = 0.04f; //meter
257  p.bilateral_sigma_spatial = 4.5; //pixels
258  p.bilateral_kernel_size = 7; //pixels
259 
260  p.icp_truncate_depth_dist = 0.f; //meters, disabled
261  p.icp_dist_thres = 0.1f; //meters
262  p.icp_angle_thres = deg2rad(30.f); //radians
263  p.icp_iter_num.assign(iters, iters + levels);
264 
265  p.tsdf_min_camera_movement = 0.f; //meters, disabled
266  p.tsdf_trunc_dist = 0.04f; //meters;
267  p.tsdf_max_weight = 64; //frames
268 
269  p.raycast_step_factor = 0.75f; //in voxel sizes
270  p.gradient_delta_factor = 0.5f; //in voxel sizes
271 
272  //p.light_pose = p.volume_pose.translation()/4; //meters
273  p.light_pose = Vec3f::all(0.f); //meters
274  p.cmd_options = NULL;
275  return p;
276 }
kfusion::cuda::setDevice
KF_EXPORTS void setDevice(int device)
kfusion::cuda::checkIfPreFermiGPU
KF_EXPORTS bool checkIfPreFermiGPU(int device)
kfusion::Vec3f
cv::Vec3f Vec3f
Definition: types.hpp:16
kfusion::SampledScopeTime::SampledScopeTime
SampledScopeTime(double &time_ms)
Definition: core.cpp:202
kfusion::cuda::getDeviceName
KF_EXPORTS std::string getDeviceName(int device)
p
SharedPointer p
Definition: ConvertShared.hpp:42
kfusion::Affine3f
cv::Affine3f Affine3f
Definition: types.hpp:18
NULL
#define NULL
Definition: mydefs.hpp:141
kfusion::ScopeTime::start
double start
Definition: types.hpp:106
kinfu.hpp
kfusion::SampledScopeTime::getTime
double getTime()
Definition: core.cpp:218
kfusion::ScopeTime::getTime
double getTime()
Definition: core.cpp:233
kfusion::cuda::printCudaDeviceInfo
KF_EXPORTS void printCudaDeviceInfo(int device)
kfusion::KinFuParams::default_params
static KinFuParams default_params()
Definition: core.cpp:238
kfusion::cuda::printShortCudaDeviceInfo
KF_EXPORTS void printShortCudaDeviceInfo(int device)
kfusion::KinFuParams
Definition: types.hpp:127
kfusion::SampledScopeTime::~SampledScopeTime
~SampledScopeTime()
Definition: core.cpp:206
safe_call.hpp
kfusion::ScopeTime::ScopeTime
ScopeTime(const char *name)
Definition: core.cpp:223
kfusion::ScopeTime::~ScopeTime
~ScopeTime()
Definition: core.cpp:227
cudaSafeCall
#define cudaSafeCall(expr)
Definition: safe_call.hpp:16
kfusion::cuda::error
KF_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func="")
Error handler. All GPU functions from this subsystem call the function to report an error....
Definition: device_memory.cpp:7
kfusion::deg2rad
float deg2rad(float alpha)
Definition: types.hpp:101
kfusion::cuda::getCudaEnabledDeviceCount
KF_EXPORTS int getCudaEnabledDeviceCount()
kfusion::Intr
Definition: types.hpp:20
kfusion::SampledScopeTime::start
double start
Definition: types.hpp:124


lvr2
Author(s): Thomas Wiemann , Sebastian Pütz , Alexander Mock , Lars Kiesow , Lukas Kalbertodt , Tristan Igelbrink , Johan M. von Behren , Dominik Feldschnieders , Alexander Löhr
autogenerated on Wed Mar 2 2022 00:37:23