depth_registration_opencl.cpp
Go to the documentation of this file.
1 
18 #include <fstream>
19 
21 
22 #define CL_USE_DEPRECATED_OPENCL_1_2_APIS
23 #define CL_USE_DEPRECATED_OPENCL_2_0_APIS
24 
25 #ifdef KINECT2_OPENCL_ICD_LOADER_IS_OLD
26 #define CL_USE_DEPRECATED_OPENCL_1_1_APIS
27 #include <CL/cl.h>
28 #ifdef CL_VERSION_1_2
29 #undef CL_VERSION_1_2
30 #endif //CL_VERSION_1_2
31 #endif //LIBFREENECT2_OPENCL_ICD_LOADER_IS_OLD
32 
33 #include <CL/cl.hpp>
34 
35 #ifndef REG_OPENCL_FILE
36 #define REG_OPENCL_FILE ""
37 #endif
38 
40 
41 //#define ENABLE_PROFILING_CL
42 
43 #define CL_FILENAME (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__)
44 #define PRINT_CL_ERROR(expr, err) OUT_ERROR(FG_BLUE "[" << CL_FILENAME << "]" FG_CYAN "(" << __LINE__ << ") " FG_YELLOW << expr << FG_RED " failed: " << err)
45 
46 #define CHECK_CL_PARAM(expr) do { cl_int err = CL_SUCCESS; (expr); if (err != CL_SUCCESS) { PRINT_CL_ERROR(#expr, err); return false; } } while(0)
47 #define CHECK_CL_RETURN(expr) do { cl_int err = (expr); if (err != CL_SUCCESS) { PRINT_CL_ERROR(#expr, err); return false; } } while(0)
48 #define CHECK_CL_ON_FAIL(expr, on_fail) do { cl_int err = (expr); if (err != CL_SUCCESS) { PRINT_CL_ERROR(#expr, err); on_fail; return false; } } while(0)
49 
51 {
54 
57 
62 
63  size_t sizeDepth;
65  size_t sizeIndex;
66  size_t sizeImgZ;
67  size_t sizeDists;
68  size_t sizeSelDist;
69  size_t sizeMap;
70 
80 
82  unsigned char *dataOutput;
83 
84 #ifdef ENABLE_PROFILING_CL
85  std::vector<double> timings;
86  int count;
87 #endif
88 };
89 
92 {
93  data = new OCLData;
94 }
95 
97 {
98  delete data;
99 }
100 
101 void getDevices(const std::vector<cl::Platform> &platforms, std::vector<cl::Device> &devices)
102 {
103  devices.clear();
104  for(size_t i = 0; i < platforms.size(); ++i)
105  {
106  const cl::Platform &platform = platforms[i];
107 
108  std::vector<cl::Device> devs;
109  if(platform.getDevices(CL_DEVICE_TYPE_ALL, &devs) != CL_SUCCESS)
110  {
111  continue;
112  }
113 
114  devices.insert(devices.end(), devs.begin(), devs.end());
115  }
116 }
117 
118 std::string deviceString(cl::Device &dev)
119 {
120  std::string devName, devVendor, devType;
121  cl_device_type devTypeID;
122  dev.getInfo(CL_DEVICE_NAME, &devName);
123  dev.getInfo(CL_DEVICE_VENDOR, &devVendor);
124  dev.getInfo(CL_DEVICE_TYPE, &devTypeID);
125 
126  switch(devTypeID)
127  {
128  case CL_DEVICE_TYPE_CPU:
129  devType = "CPU";
130  break;
131  case CL_DEVICE_TYPE_GPU:
132  devType = "GPU";
133  break;
134  case CL_DEVICE_TYPE_ACCELERATOR:
135  devType = "ACCELERATOR";
136  break;
137  default:
138  devType = "CUSTOM/UNKNOWN";
139  }
140 
141  return devName + " (" + devType + ")[" + devVendor + ']';
142 }
143 
144 bool selectDevice(std::vector<cl::Device> &devices, cl::Device &device, const int deviceId = -1)
145 {
146  if(deviceId != -1 && devices.size() > (size_t)deviceId)
147  {
148  device = devices[deviceId];
149  return true;
150  }
151 
152  bool selected = false;
153  cl_device_type selectedType = 0;
154 
155  for(size_t i = 0; i < devices.size(); ++i)
156  {
157  cl::Device &dev = devices[i];
158  cl_device_type devTypeID;
159  dev.getInfo(CL_DEVICE_TYPE, &devTypeID);
160 
161  if(!selected || (selectedType != CL_DEVICE_TYPE_GPU && devTypeID == CL_DEVICE_TYPE_GPU))
162  {
163  selectedType = devTypeID;
164  selected = true;
165  device = dev;
166  }
167  }
168  return selected;
169 }
170 
171 bool DepthRegistrationOpenCL::init(const int deviceId)
172 {
173  std::string sourceCode;
174  if(!readProgram(sourceCode))
175  {
176  return false;
177  }
178 
179  std::vector<cl::Platform> platforms;
180  CHECK_CL_RETURN(cl::Platform::get(&platforms));
181 
182  if(platforms.empty())
183  {
184  OUT_ERROR("no opencl platforms found.");
185  return false;
186  }
187 
188  std::vector<cl::Device> devices;
189  getDevices(platforms, devices);
190 
191  OUT_INFO("devices:");
192  for(size_t i = 0; i < devices.size(); ++i)
193  {
194  OUT_INFO(" " << i << ": " FG_CYAN << deviceString(devices[i]) << NO_COLOR);
195  }
196 
197  if(!selectDevice(devices, data->device, deviceId))
198  {
199  OUT_ERROR("could not find any suitable device");
200  return false;
201  }
202  OUT_INFO("selected device: " FG_YELLOW << deviceString(data->device) << NO_COLOR);
203 
204  CHECK_CL_PARAM(data->context = cl::Context(data->device, NULL, NULL, NULL, &err));
205 
206  std::string options;
207  generateOptions(options);
208 
209  cl::Program::Sources source(1, std::make_pair(sourceCode.c_str(), sourceCode.length()));
210  CHECK_CL_PARAM(data->program = cl::Program(data->context, source, &err));
211 
212  CHECK_CL_ON_FAIL(data->program.build(options.c_str()),
213  OUT_ERROR("failed to build program: " << err);
214  OUT_ERROR("Build Status: " << data->program.getBuildInfo<CL_PROGRAM_BUILD_STATUS>(data->device));
215  OUT_ERROR("Build Options:\t" << data->program.getBuildInfo<CL_PROGRAM_BUILD_OPTIONS>(data->device));
216  OUT_ERROR("Build Log:\t " << data->program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(data->device)));
217 
218 #ifdef ENABLE_PROFILING_CL
219  data->count = 0;
220  CHECK_CL_PARAM(data->queue = cl::CommandQueue(data->context, data->device, CL_QUEUE_PROFILING_ENABLE, &err));
221 #else
223 #endif
224 
225  data->sizeDepth = sizeDepth.height * sizeDepth.width * sizeof(uint16_t);
226  data->sizeRegistered = sizeRegistered.height * sizeRegistered.width * sizeof(uint16_t);
227  data->sizeIndex = sizeRegistered.height * sizeRegistered.width * sizeof(cl_int4);
228  data->sizeImgZ = sizeRegistered.height * sizeRegistered.width * sizeof(uint16_t);
229  data->sizeDists = sizeRegistered.height * sizeRegistered.width * sizeof(cl_float4);
230  data->sizeSelDist = sizeRegistered.height * sizeRegistered.width * sizeof(float);
231  data->sizeMap = sizeRegistered.height * sizeRegistered.width * sizeof(float);
232 
233  CHECK_CL_PARAM(data->bufferDepth = cl::Buffer(data->context, CL_MEM_READ_ONLY, data->sizeDepth, NULL, &err));
234  CHECK_CL_PARAM(data->bufferScaled = cl::Buffer(data->context, CL_MEM_READ_WRITE, data->sizeRegistered, NULL, &err));
235  CHECK_CL_PARAM(data->bufferRegistered = cl::Buffer(data->context, CL_MEM_READ_WRITE, data->sizeRegistered, NULL, &err));
236  CHECK_CL_PARAM(data->bufferIndex = cl::Buffer(data->context, CL_MEM_READ_WRITE, data->sizeIndex, NULL, &err));
237  CHECK_CL_PARAM(data->bufferImgZ = cl::Buffer(data->context, CL_MEM_READ_WRITE, data->sizeImgZ, NULL, &err));
238  CHECK_CL_PARAM(data->bufferDists = cl::Buffer(data->context, CL_MEM_READ_WRITE, data->sizeDists, NULL, &err));
239  CHECK_CL_PARAM(data->bufferSelDist = cl::Buffer(data->context, CL_MEM_READ_WRITE, data->sizeSelDist, NULL, &err));
240  CHECK_CL_PARAM(data->bufferMapX = cl::Buffer(data->context, CL_MEM_READ_ONLY, data->sizeMap, NULL, &err));
241  CHECK_CL_PARAM(data->bufferMapY = cl::Buffer(data->context, CL_MEM_READ_ONLY, data->sizeMap, NULL, &err));
242  CHECK_CL_PARAM(data->bufferOutput = cl::Buffer(data->context, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, data->sizeRegistered, NULL, &err));
243 
244  CHECK_CL_PARAM(data->kernelSetZero = cl::Kernel(data->program, "setZero", &err));
247 
248  CHECK_CL_PARAM(data->kernelProject = cl::Kernel(data->program, "project", &err));
255 
256  CHECK_CL_PARAM(data->kernelCheckDepth = cl::Kernel(data->program, "checkDepth", &err));
262 
263  CHECK_CL_PARAM(data->kernelRemap = cl::Kernel(data->program, "remapDepth", &err));
268 
271 
272  CHECK_CL_PARAM(data->dataOutput = (unsigned char *)data->queue.enqueueMapBuffer(data->bufferOutput, CL_TRUE, CL_MAP_READ, 0, data->sizeRegistered, NULL, NULL, &err));
273  return true;
274 }
275 
276 bool DepthRegistrationOpenCL::registerDepth(const cv::Mat &depth, cv::Mat &registered)
277 {
278  cl::Event eventRead;
279  std::vector<cl::Event> eventZero(2), eventRemap(1), eventProject(1), eventCheckDepth1(1), eventCheckDepth2(1);
280  cl::NDRange range(sizeRegistered.height * sizeRegistered.width);
281 
282  CHECK_CL_RETURN(data->queue.enqueueWriteBuffer(data->bufferDepth, CL_FALSE, 0, data->sizeDepth, depth.data, NULL, &eventZero[0]));
284 
286 
287  CHECK_CL_RETURN(data->queue.enqueueNDRangeKernel(data->kernelProject, cl::NullRange, range, cl::NullRange, &eventRemap, &eventProject[0]));
288 
289  CHECK_CL_RETURN(data->queue.enqueueNDRangeKernel(data->kernelCheckDepth, cl::NullRange, range, cl::NullRange, &eventProject, &eventCheckDepth1[0]));
290 
291  CHECK_CL_RETURN(data->queue.enqueueNDRangeKernel(data->kernelCheckDepth, cl::NullRange, range, cl::NullRange, &eventCheckDepth1, &eventCheckDepth2[0]));
292 
293  CHECK_CL_RETURN(data->queue.enqueueReadBuffer(data->bufferRegistered, CL_FALSE, 0, data->sizeRegistered, data->dataOutput, &eventCheckDepth2, &eventRead));
294 
295  CHECK_CL_RETURN(eventRead.wait());
296 
297  registered = cv::Mat(sizeRegistered, CV_16U, data->dataOutput);
298 
299 #ifdef ENABLE_PROFILING_CL
300  if(data->count == 0)
301  {
302  data->timings.clear();
303  data->timings.resize(7, 0.0);
304  }
305 
306  data->timings[0] += eventZero[0].getProfilingInfo<CL_PROFILING_COMMAND_END>() - eventZero[0].getProfilingInfo<CL_PROFILING_COMMAND_START>();
307  data->timings[1] += eventZero[1].getProfilingInfo<CL_PROFILING_COMMAND_END>() - eventZero[1].getProfilingInfo<CL_PROFILING_COMMAND_START>();
308  data->timings[2] += eventRemap[0].getProfilingInfo<CL_PROFILING_COMMAND_END>() - eventRemap[0].getProfilingInfo<CL_PROFILING_COMMAND_START>();
309  data->timings[3] += eventProject[0].getProfilingInfo<CL_PROFILING_COMMAND_END>() - eventProject[0].getProfilingInfo<CL_PROFILING_COMMAND_START>();
310  data->timings[4] += eventCheckDepth1[0].getProfilingInfo<CL_PROFILING_COMMAND_END>() - eventCheckDepth1[0].getProfilingInfo<CL_PROFILING_COMMAND_START>();
311  data->timings[5] += eventCheckDepth2[0].getProfilingInfo<CL_PROFILING_COMMAND_END>() - eventCheckDepth2[0].getProfilingInfo<CL_PROFILING_COMMAND_START>();
312  data->timings[6] += eventRead.getProfilingInfo<CL_PROFILING_COMMAND_END>() - eventRead.getProfilingInfo<CL_PROFILING_COMMAND_START>();
313 
314  if(++data->count == 100)
315  {
316  double sum = data->timings[0] + data->timings[1] + data->timings[2] + data->timings[3] + data->timings[4] + data->timings[5] + data->timings[6];
317  OUT_INFO("writing depth: " << data->timings[0] / 100000000.0 << " ms.");
318  OUT_INFO("setting zero: " << data->timings[1] / 100000000.0 << " ms.");
319  OUT_INFO("remap: " << data->timings[2] / 100000000.0 << " ms.");
320  OUT_INFO("project: " << data->timings[3] / 100000000.0 << " ms.");
321  OUT_INFO("check depth 1: " << data->timings[4] / 100000000.0 << " ms.");
322  OUT_INFO("check depth 2: " << data->timings[5] / 100000000.0 << " ms.");
323  OUT_INFO("read registered: " << data->timings[6] / 100000000.0 << " ms.");
324  OUT_INFO("overall: " << sum / 100000000.0 << " ms.");
325  data->count = 0;
326  }
327 #endif
328  return true;
329 }
330 
331 void DepthRegistrationOpenCL::generateOptions(std::string &options) const
332 {
333  std::ostringstream oss;
334  oss.precision(16);
335  oss << std::scientific;
336 
337  // Rotation
338  oss << " -D r00=" << rotation.at<double>(0, 0) << "f";
339  oss << " -D r01=" << rotation.at<double>(0, 1) << "f";
340  oss << " -D r02=" << rotation.at<double>(0, 2) << "f";
341  oss << " -D r10=" << rotation.at<double>(1, 0) << "f";
342  oss << " -D r11=" << rotation.at<double>(1, 1) << "f";
343  oss << " -D r12=" << rotation.at<double>(1, 2) << "f";
344  oss << " -D r20=" << rotation.at<double>(2, 0) << "f";
345  oss << " -D r21=" << rotation.at<double>(2, 1) << "f";
346  oss << " -D r22=" << rotation.at<double>(2, 2) << "f";
347 
348  // Translation
349  oss << " -D tx=" << translation.at<double>(0, 0) << "f";
350  oss << " -D ty=" << translation.at<double>(1, 0) << "f";
351  oss << " -D tz=" << translation.at<double>(2, 0) << "f";
352 
353  // Camera parameter upscaled depth
354  oss << " -D fxR=" << cameraMatrixRegistered.at<double>(0, 0) << "f";
355  oss << " -D fyR=" << cameraMatrixRegistered.at<double>(1, 1) << "f";
356  oss << " -D cxR=" << cameraMatrixRegistered.at<double>(0, 2) << "f";
357  oss << " -D cyR=" << cameraMatrixRegistered.at<double>(1, 2) << "f";
358  oss << " -D fxRInv=" << (1.0 / cameraMatrixRegistered.at<double>(0, 0)) << "f";
359  oss << " -D fyRInv=" << (1.0 / cameraMatrixRegistered.at<double>(1, 1)) << "f";
360 
361  // Clipping distances
362  oss << " -D zNear=" << (uint16_t)(zNear * 1000);
363  oss << " -D zFar=" << (uint16_t)(zFar * 1000);
364 
365  // Size registered image
366  oss << " -D heightR=" << sizeRegistered.height;
367  oss << " -D widthR=" << sizeRegistered.width;
368 
369  // Size depth image
370  oss << " -D heightD=" << sizeDepth.height;
371  oss << " -D widthD=" << sizeDepth.width;
372 
373  options = oss.str();
374 }
375 
376 bool DepthRegistrationOpenCL::readProgram(std::string &source) const
377 {
378  std::ifstream file(REG_OPENCL_FILE);
379 
380  if(!file.is_open())
381  {
382  return false;
383  }
384 
385  source = std::string((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
386  file.close();
387 
388  return true;
389 }
Class interface for cl_event.
Definition: cl.hpp:2837
cl_int getInfo(cl_device_info name, T *param) const
Wrapper for clGetDeviceInfo().
Definition: cl.hpp:2079
#define FG_YELLOW
cl_int enqueueNDRangeKernel(const Kernel &kernel, const NDRange &offset, const NDRange &global, const NDRange &local=NullRange, const VECTOR_CLASS< Event > *events=NULL, Event *event=NULL) const
Definition: cl.hpp:6324
Class interface for specifying NDRange values.
Definition: cl.hpp:4735
cl_int enqueueReadBuffer(const Buffer &buffer, cl_bool blocking,::size_t offset,::size_t size, void *ptr, const VECTOR_CLASS< Event > *events=NULL, Event *event=NULL) const
Definition: cl.hpp:5685
cl_int getBuildInfo(const Device &device, cl_program_build_info name, T *param) const
Definition: cl.hpp:5318
#define OUT_ERROR(msg)
#define CHECK_CL_PARAM(expr)
#define OUT_INFO(msg)
Class interface for Buffer Memory Objects.
Definition: cl.hpp:3140
static cl_int get(VECTOR_CLASS< Platform > *platforms)
Gets a list of available platforms.
Definition: cl.hpp:2324
CommandQueue interface for cl_command_queue.
Definition: cl.hpp:5477
cl_int wait() const
Blocks the calling thread until this event completes.
Definition: cl.hpp:2911
C++ bindings for OpenCL 1.0 (rev 48), OpenCL 1.1 (rev 33) and OpenCL 1.2 (rev 15) ...
bool selectDevice(std::vector< cl::Device > &devices, cl::Device &device, const int deviceId=-1)
cl_int setArg(cl_uint index, const T &value)
Definition: cl.hpp:4968
void * enqueueMapBuffer(const Buffer &buffer, cl_bool blocking, cl_map_flags flags,::size_t offset,::size_t size, const VECTOR_CLASS< Event > *events=NULL, Event *event=NULL, cl_int *err=NULL) const
Definition: cl.hpp:6144
cl_int getDevices(cl_device_type type, VECTOR_CLASS< Device > *devices) const
Gets a list of devices for this platform.
Definition: cl.hpp:2220
cl_int build(const VECTOR_CLASS< Device > &devices, const char *options=NULL, void(CL_CALLBACK *notifyFptr)(cl_program, void *)=NULL, void *data=NULL) const
Definition: cl.hpp:5235
Class interface for cl_kernel.
Definition: cl.hpp:4844
Program interface that implements cl_program.
Definition: cl.hpp:4990
void getDevices(const std::vector< cl::Platform > &platforms, std::vector< cl::Device > &devices)
VECTOR_CLASS< std::pair< const char *,::size_t > > Sources
Definition: cl.hpp:4994
#define REG_OPENCL_FILE
static const NDRange NullRange
A zero-dimensional range.
Definition: cl.hpp:4784
cl_int enqueueWriteBuffer(const Buffer &buffer, cl_bool blocking,::size_t offset,::size_t size, const void *ptr, const VECTOR_CLASS< Event > *events=NULL, Event *event=NULL) const
Definition: cl.hpp:5710
#define CHECK_CL_RETURN(expr)
#define FG_CYAN
void generateOptions(std::string &options) const
#define NO_COLOR
Class interface for cl_device_id.
Definition: cl.hpp:2019
#define CHECK_CL_ON_FAIL(expr, on_fail)
Class interface for cl_platform_id.
Definition: cl.hpp:2172
bool readProgram(std::string &source) const
Class interface for cl_context.
Definition: cl.hpp:2454
cl_int getProfilingInfo(cl_profiling_info name, T *param) const
Wrapper for clGetEventProfilingInfo().
Definition: cl.hpp:2886
bool registerDepth(const cv::Mat &depth, cv::Mat &registered)
std::string deviceString(cl::Device &dev)


kinect2_registration
Author(s):
autogenerated on Wed Jan 3 2018 03:48:04