kdtree_opencl.cpp
Go to the documentation of this file.
1 /*
2 
3 Copyright (c) 2010--2011, Stephane Magnenat, ASL, ETHZ, Switzerland
4 You can contact the author at <stephane at magnenat dot net>
5 
6 All rights reserved.
7 
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are met:
10  * Redistributions of source code must retain the above copyright
11  notice, this list of conditions and the following disclaimer.
12  * Redistributions in binary form must reproduce the above copyright
13  notice, this list of conditions and the following disclaimer in the
14  documentation and/or other materials provided with the distribution.
15  * Neither the name of the <organization> nor the
16  names of its contributors may be used to endorse or promote products
17  derived from this software without specific prior written permission.
18 
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 DISCLAIMED. IN NO EVENT SHALL ETH-ASL BE LIABLE FOR ANY
23 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 */
31 
32 #ifdef HAVE_OPENCL
33 
34 #include "nabo_private.h"
35 #include "index_heap.h"
36 #include <iostream>
37 #include <sstream>
38 #include <fstream>
39 #include <stdexcept>
40 #include <limits>
41 #include <queue>
42 #include <algorithm>
43 // #include <map>
44 
45 
51 namespace cl
52 {
54  typedef std::vector<Device> Devices;
55 }
56 
57 namespace Nabo
58 {
60 
63  template<typename T, typename CloudType>
65  {
66  T maxVal(0);
67  size_t maxIdx(0);
68  for (int i = 0; i < v.size(); ++i)
69  {
70  if (v[i] > maxVal)
71  {
72  maxVal = v[i];
73  maxIdx = i;
74  }
75  }
76  return maxIdx;
77  }
78 
80 
81 
83  #define MAX_K 32
84 
85  using namespace std;
86 
88  template<typename T, typename CloudType>
89  struct EnableCLTypeSupport {};
90 
92  template<typename CloudType>
93  struct EnableCLTypeSupport<float, CloudType>
94  {
96  static string code(const cl::Device& device)
97  {
98  return "typedef float T;\n";
99  }
100  };
101 
103  template<typename CloudType>
104  struct EnableCLTypeSupport<double, CloudType>
105  {
107 
108  static string code(const cl::Device& device)
109  {
110  string s;
111  const string& exts(device.getInfo<CL_DEVICE_EXTENSIONS>());
112  //cerr << "extensions: " << exts << endl;
113  // first try generic 64-bits fp, otherwise try to fall back on vendor-specific extensions
114  if (exts.find("cl_khr_fp64") != string::npos)
115  s += "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
116  else if (exts.find("cl_amd_fp64") != string::npos)
117  s += "#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n";
118  else
119  throw runtime_error("The OpenCL platform does not support 64 bits double-precision floating-points scalars.");
120  s += "typedef double T;\n";
121  return s;
122  }
123  };
124 
126  struct SourceCacher
127  {
129  typedef std::vector<cl::Device> Devices;
131  typedef std::map<std::string, cl::Program> ProgramCache;
132 
133  cl::Context context;
134  Devices devices;
135  ProgramCache cachedPrograms;
136 
138  SourceCacher(const cl_device_type deviceType)
139  {
140  // looking for platforms, AMD drivers do not like the default for creating context
141  vector<cl::Platform> platforms;
142  cl::Platform::get(&platforms);
143  if (platforms.empty())
144  throw runtime_error("No OpenCL platform found");
145  //for(vector<cl::Platform>::iterator i = platforms.begin(); i != platforms.end(); ++i)
146  // cerr << "platform " << i - platforms.begin() << " is " << (*i).getInfo<CL_PLATFORM_VENDOR>() << endl;
147  cl::Platform platform = platforms[0];
148  const char *userDefinedPlatform(getenv("NABO_OPENCL_USE_PLATFORM"));
149  if (userDefinedPlatform)
150  {
151  size_t userDefinedPlatformId = atoi(userDefinedPlatform);
152  if (userDefinedPlatformId < platforms.size())
153  platform = platforms[userDefinedPlatformId];
154  }
155 
156  // create OpenCL contexts
157  cl_context_properties properties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform(), 0 };
158  bool deviceFound = false;
159  try {
160  context = cl::Context(deviceType, properties);
161  deviceFound = true;
162  } catch (const cl::Error& e) {
163  cerr << "Cannot find device type " << deviceType << " for OpenCL, falling back to any device" << endl;
164  }
165  if (!deviceFound)
166  context = cl::Context(CL_DEVICE_TYPE_ALL, properties);
167  devices = context.getInfo<CL_CONTEXT_DEVICES>();
168  if (devices.empty())
169  throw runtime_error("No devices on OpenCL platform");
170  }
171 
173  ~SourceCacher()
174  {
175  cerr << "Destroying source cacher containing " << cachedPrograms.size() << " cached programs" << endl;
176  }
177 
179  bool contains(const std::string& source)
180  {
181  return cachedPrograms.find(source) != cachedPrograms.end();
182  }
183  };
184 
186  class ContextManager
187  {
188  public:
190  typedef std::map<cl_device_type, SourceCacher*> Devices;
191 
193  ~ContextManager()
194  {
195  cerr << "Destroying CL context manager, used " << devices.size() << " contexts" << endl;
196  for (Devices::iterator it(devices.begin()); it != devices.end(); ++it)
197  delete it->second;
198  }
200  cl::Context& createContext(const cl_device_type deviceType)
201  {
202  std::lock_guard lock(mutex);
203  Devices::iterator it(devices.find(deviceType));
204  if (it == devices.end())
205  {
206  it = devices.insert(
207  pair<cl_device_type, SourceCacher*>(deviceType, new SourceCacher(deviceType))
208  ).first;
209  }
210  return it->second->context;
211  }
213  SourceCacher* getSourceCacher(const cl_device_type deviceType)
214  {
215  std::lock_guard lock(mutex);
216  Devices::iterator it(devices.find(deviceType));
217  if (it == devices.end())
218  throw runtime_error("Attempt to get source cacher before creating a context");
219  return it->second;
220  }
221 
222  protected:
223  Devices devices;
224  std::mutex mutex;
225  };
226 
228  static ContextManager contextManager;
229 
230  template<typename T, typename CloudType>
231  OpenCLSearch<T, CloudType>::OpenCLSearch(const CloudType& cloud, const Index dim, const unsigned creationOptionFlags, const cl_device_type deviceType):
232  NearestNeighbourSearch<T, CloudType>::NearestNeighbourSearch(cloud, dim, creationOptionFlags),
233  deviceType(deviceType),
234  context(contextManager.createContext(deviceType))
235  {
236  }
237 
238  template<typename T, typename CloudType>
239  void OpenCLSearch<T, CloudType>::initOpenCL(const char* clFileName, const char* kernelName, const std::string& additionalDefines)
240  {
241  const bool collectStatistics(creationOptionFlags & NearestNeighbourSearch<T, CloudType>::TOUCH_STATISTICS);
242 
243  SourceCacher* sourceCacher(contextManager.getSourceCacher(deviceType));
244  SourceCacher::Devices& devices(sourceCacher->devices);
245 
246  // build and load source files
247  cl::Program::Sources sources;
248  // build defines
249  ostringstream oss;
250  oss << EnableCLTypeSupport<T, CloudType>::code(devices.back());
251  oss << "#define EPSILON " << numeric_limits<T>::epsilon() << "\n";
252  oss << "#define DIM_COUNT " << dim << "\n";
253  //oss << "#define CLOUD_POINT_COUNT " << cloud.cols() << "\n";
254  oss << "#define POINT_STRIDE " << cloud.stride() << "\n";
255  oss << "#define MAX_K " << MAX_K << "\n";
256  if (collectStatistics)
257  oss << "#define TOUCH_STATISTICS\n";
258  oss << additionalDefines;
259  //cerr << "params:\n" << oss.str() << endl;
260 
261  const std::string& source(oss.str());
262  if (!sourceCacher->contains(source))
263  {
264  const size_t defLen(source.length());
265  char *defContent(new char[defLen+1]);
266  strcpy(defContent, source.c_str());
267  sources.push_back(std::make_pair(defContent, defLen));
268  string sourceFileName(OPENCL_SOURCE_DIR);
269  sourceFileName += clFileName;
270  // load files
271  const char* files[] = {
272  OPENCL_SOURCE_DIR "structure.cl",
273  OPENCL_SOURCE_DIR "heap.cl",
274  sourceFileName.c_str(),
275  NULL
276  };
277  for (const char** file = files; *file != NULL; ++file)
278  {
279  std::ifstream stream(*file);
280  if (!stream.good())
281  throw runtime_error((string("cannot open file: ") + *file));
282 
283  stream.seekg(0, std::ios_base::end);
284  size_t size(stream.tellg());
285  stream.seekg(0, std::ios_base::beg);
286 
287  char* content(new char[size + 1]);
288  std::copy(std::istreambuf_iterator<char>(stream),
289  std::istreambuf_iterator<char>(), content);
290  content[size] = '\0';
291 
292  sources.push_back(std::make_pair(content, size));
293  }
294  sourceCacher->cachedPrograms[source] = cl::Program(context, sources);
295  cl::Program& program = sourceCacher->cachedPrograms[source];
296 
297  // build
298  cl::Error error(CL_SUCCESS);
299  try {
300  program.build(devices);
301  } catch (cl::Error e) {
302  error = e;
303  }
304 
305  // dump
306  for (cl::Devices::const_iterator it = devices.begin(); it != devices.end(); ++it)
307  {
308  cerr << "device : " << it->getInfo<CL_DEVICE_NAME>() << "\n";
309  cerr << "compilation log:\n" << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(*it) << endl;
310  }
311  // cleanup sources
312  for (cl::Program::Sources::iterator it = sources.begin(); it != sources.end(); ++it)
313  {
314  delete[] it->first;
315  }
316  sources.clear();
317 
318  // make sure to stop if compilation failed
319  if (error.err() != CL_SUCCESS)
320  throw error;
321  }
322  cl::Program& program = sourceCacher->cachedPrograms[source];
323 
324  // build kernel and command queue
325  knnKernel = cl::Kernel(program, kernelName);
326  queue = cl::CommandQueue(context, devices.back());
327 
328  // map cloud
329  if (!(cloud.Flags & Eigen::DirectAccessBit) || (cloud.Flags & Eigen::RowMajorBit))
330  throw runtime_error("wrong memory mapping in point cloud");
331  const size_t cloudCLSize(cloud.cols() * cloud.stride() * sizeof(T));
332  cloudCL = cl::Buffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, cloudCLSize, const_cast<T*>(&cloud.coeff(0,0)));
333  knnKernel.setArg(0, sizeof(cl_mem), &cloudCL);
334  }
335 
336  template<typename T, typename CloudType>
337  unsigned long OpenCLSearch<T, CloudType>::knn(const Matrix& query, IndexMatrix& indices, Matrix& dists2, const Index k, const T epsilon, const unsigned optionFlags, const T maxRadius) const
338  {
339  checkSizesKnn(query, indices, dists2, k, optionFlags);
340  const bool collectStatistics(creationOptionFlags & NearestNeighbourSearch<T, CloudType>::TOUCH_STATISTICS);
341 
342  // check K
343  if (k > MAX_K)
344  throw runtime_error("number of neighbors too large for OpenCL");
345 
346  // check consistency of query wrt cloud
347  if (query.stride() != cloud.stride() ||
348  query.rows() != cloud.rows())
349  throw runtime_error("query is not of the same dimensionality as the point cloud");
350 
351  // map query
352  if (!(query.Flags & Eigen::DirectAccessBit) || (query.Flags & Eigen::RowMajorBit))
353  throw runtime_error("wrong memory mapping in query data");
354  const size_t queryCLSize(query.cols() * query.stride() * sizeof(T));
355  cl::Buffer queryCL(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, queryCLSize, const_cast<T*>(&query.coeff(0,0)));
356  knnKernel.setArg(1, sizeof(cl_mem), &queryCL);
357  // map indices
358  assert((indices.Flags & Eigen::DirectAccessBit) && (!(indices.Flags & Eigen::RowMajorBit)));
359  const int indexStride(indices.stride());
360  const size_t indicesCLSize(indices.cols() * indexStride * sizeof(int));
361  cl::Buffer indicesCL(context, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, indicesCLSize, &indices.coeffRef(0,0));
362  knnKernel.setArg(2, sizeof(cl_mem), &indicesCL);
363  // map dists2
364  assert((dists2.Flags & Eigen::DirectAccessBit) && (!(dists2.Flags & Eigen::RowMajorBit)));
365  const int dists2Stride(dists2.stride());
366  const size_t dists2CLSize(dists2.cols() * dists2Stride * sizeof(T));
367  cl::Buffer dists2CL(context, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, dists2CLSize, &dists2.coeffRef(0,0));
368  knnKernel.setArg(3, sizeof(cl_mem), &dists2CL);
369 
370  // set resulting parameters
371  knnKernel.setArg(4, k);
372  knnKernel.setArg(5, (1 + epsilon)*(1 + epsilon));
373  knnKernel.setArg(6, maxRadius*maxRadius);
374  knnKernel.setArg(7, optionFlags);
375  knnKernel.setArg(8, indexStride);
376  knnKernel.setArg(9, dists2Stride);
377  knnKernel.setArg(10, cl_uint(cloud.cols()));
378 
379  // if required, map visit count
380  vector<cl_uint> visitCounts;
381  const size_t visitCountCLSize(query.cols() * sizeof(cl_uint));
382  cl::Buffer visitCountCL;
383  if (collectStatistics)
384  {
385  visitCounts.resize(query.cols());
386  visitCountCL = cl::Buffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, visitCountCLSize, &visitCounts[0]);
387  knnKernel.setArg(11, sizeof(cl_mem), &visitCountCL);
388  }
389 
390  // execute query
391  queue.enqueueNDRangeKernel(knnKernel, cl::NullRange, cl::NDRange(query.cols()), cl::NullRange);
392  queue.enqueueMapBuffer(indicesCL, true, CL_MAP_READ, 0, indicesCLSize, 0, 0);
393  queue.enqueueMapBuffer(dists2CL, true, CL_MAP_READ, 0, dists2CLSize, 0, 0);
394  if (collectStatistics)
395  queue.enqueueMapBuffer(visitCountCL, true, CL_MAP_READ, 0, visitCountCLSize, 0, 0);
396  queue.finish();
397 
398  // if required, collect statistics
399  if (collectStatistics)
400  {
401  unsigned long totalVisitCounts(0);
402  for (size_t i = 0; i < visitCounts.size(); ++i)
403  totalVisitCounts += (unsigned long)visitCounts[i];
404  return totalVisitCounts;
405  }
406  else
407  return 0;
408  }
409 
410  template<typename T, typename CloudType>
411  BruteForceSearchOpenCL<T, CloudType>::BruteForceSearchOpenCL(const CloudType& cloud, const Index dim, const unsigned creationOptionFlags, const cl_device_type deviceType):
412  OpenCLSearch<T, CloudType>::OpenCLSearch(cloud, dim, creationOptionFlags, deviceType)
413  {
414 #ifdef EIGEN3_API
415  const_cast<Vector&>(this->minBound) = cloud.topRows(this->dim).rowwise().minCoeff();
416  const_cast<Vector&>(this->maxBound) = cloud.topRows(this->dim).rowwise().maxCoeff();
417 #else // EIGEN3_API
418  // compute bounds
419  for (int i = 0; i < cloud.cols(); ++i)
420  {
421  const Vector& v(cloud.block(0,i,this->dim,1));
422  const_cast<Vector&>(this->minBound) = this->minBound.cwise().min(v);
423  const_cast<Vector&>(this->maxBound) = this->maxBound.cwise().max(v);
424  }
425 #endif // EIGEN3_API
426  // init openCL
427  initOpenCL("knn_bf.cl", "knnBruteForce");
428  }
429 
430  template struct BruteForceSearchOpenCL<float>;
431  template struct BruteForceSearchOpenCL<double>;
432  template struct BruteForceSearchOpenCL<float, Eigen::Matrix3Xf>;
433  template struct BruteForceSearchOpenCL<double, Eigen::Matrix3Xd>;
434  template struct BruteForceSearchOpenCL<float, Eigen::Map<const Eigen::Matrix3Xf, Eigen::Aligned> >;
435  template struct BruteForceSearchOpenCL<double, Eigen::Map<const Eigen::Matrix3Xd, Eigen::Aligned> >;
436 
437 
438 
439  template<typename T, typename CloudType>
440  size_t KDTreeBalancedPtInLeavesStackOpenCL<T, CloudType>::getTreeSize(size_t elCount) const
441  {
442  // FIXME: 64 bits safe stuff, only work for 2^32 elements right now
443  assert(elCount > 0);
444  elCount --;
445  size_t count = 0;
446  int i = 31;
447  for (; i >= 0; --i)
448  {
449  if (elCount & (1 << i))
450  break;
451  }
452  for (int j = 0; j <= i; ++j)
453  count |= (1 << j);
454  count <<= 1;
455  count |= 1;
456  return count;
457  }
458 
459  template<typename T, typename CloudType>
460  size_t KDTreeBalancedPtInLeavesStackOpenCL<T, CloudType>::getTreeDepth(size_t elCount) const
461  {
462  if (elCount <= 1)
463  return 0;
464  elCount --;
465  size_t i = 31;
466  for (; i >= 0; --i)
467  {
468  if (elCount & (1 << i))
469  break;
470  }
471  return i+1;
472  }
473 
474  template<typename T, typename CloudType>
475  void KDTreeBalancedPtInLeavesStackOpenCL<T, CloudType>::buildNodes(const BuildPointsIt first, const BuildPointsIt last, const size_t pos, const Vector minValues, const Vector maxValues)
476  {
477  const size_t count(last - first);
478  //cerr << count << endl;
479  if (count == 1)
480  {
481  const int d = -2-(first->index);
482  assert(pos < nodes.size());
483  nodes[pos] = Node(d);
484  return;
485  }
486 
487  // find the largest dimension of the box
488  size_t cutDim = argMax<T, CloudType>(maxValues - minValues);
489 
490  // compute number of elements
491  const size_t rightCount(count/2);
492  const size_t leftCount(count - rightCount);
493  assert(last - rightCount == first + leftCount);
494 
495  // sort
496  nth_element(first, first + leftCount, last, CompareDim(cutDim));
497 
498  // set node
499  const T cutVal((first+leftCount)->pos.coeff(cutDim));
500  nodes[pos] = Node(cutDim, cutVal);
501 
502  //cerr << pos << " cutting on " << cutDim << " at " << (first+leftCount)->pos[cutDim] << endl;
503 
504  // update bounds for left
505  Vector leftMaxValues(maxValues);
506  leftMaxValues[cutDim] = cutVal;
507  // update bounds for right
508  Vector rightMinValues(minValues);
509  rightMinValues[cutDim] = cutVal;
510 
511  // recurse
512  buildNodes(first, first + leftCount, childLeft(pos), minValues, leftMaxValues);
513  buildNodes(first + leftCount, last, childRight(pos), rightMinValues, maxValues);
514  }
515 
516  template<typename T, typename CloudType>
517  KDTreeBalancedPtInLeavesStackOpenCL<T, CloudType>::KDTreeBalancedPtInLeavesStackOpenCL(const CloudType& cloud, const Index dim, const unsigned creationOptionFlags, const cl_device_type deviceType):
518  OpenCLSearch<T, CloudType>::OpenCLSearch(cloud, dim, creationOptionFlags, deviceType)
519  {
521 
522  // build point vector and compute bounds
523  BuildPoints buildPoints;
524  buildPoints.reserve(cloud.cols());
525  for (int i = 0; i < cloud.cols(); ++i)
526  {
527  const Vector& v(cloud.block(0,i,this->dim,1));
528  buildPoints.push_back(BuildPoint(v, i));
529 #ifdef EIGEN3_API
530  const_cast<Vector&>(minBound) = minBound.array().min(v.array());
531  const_cast<Vector&>(maxBound) = maxBound.array().max(v.array());
532 #else // EIGEN3_API
533  const_cast<Vector&>(minBound) = minBound.cwise().min(v);
534  const_cast<Vector&>(maxBound) = maxBound.cwise().max(v);
535 #endif // EIGEN3_API
536  }
537 
538  // create nodes
539  nodes.resize(getTreeSize(cloud.cols()));
540  buildNodes(buildPoints.begin(), buildPoints.end(), 0, minBound, maxBound);
541  const unsigned maxStackDepth(getTreeDepth(nodes.size()) + 1);
542 
543  // init openCL
544  initOpenCL("knn_kdtree_pt_in_leaves.cl", "knnKDTree", "#define MAX_STACK_DEPTH " + std::to_string(maxStackDepth) + "\n");
545 
546  // map nodes, for info about alignment, see sect 6.1.5
547  const size_t nodesCLSize(nodes.size() * sizeof(Node));
548  nodesCL = cl::Buffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, nodesCLSize, &nodes[0]);
549  if (collectStatistics)
550  knnKernel.setArg(12, sizeof(cl_mem), &nodesCL);
551  else
552  knnKernel.setArg(11, sizeof(cl_mem), &nodesCL);
553  }
554 
555  template struct KDTreeBalancedPtInLeavesStackOpenCL<float>;
556  template struct KDTreeBalancedPtInLeavesStackOpenCL<double>;
557  template struct KDTreeBalancedPtInLeavesStackOpenCL<float, Eigen::Matrix3Xf>;
558  template struct KDTreeBalancedPtInLeavesStackOpenCL<double, Eigen::Matrix3Xd>;
559  template struct KDTreeBalancedPtInLeavesStackOpenCL<float, Eigen::Map<const Eigen::Matrix3Xf, Eigen::Aligned> >;
560  template struct KDTreeBalancedPtInLeavesStackOpenCL<double, Eigen::Map<const Eigen::Matrix3Xd, Eigen::Aligned> >;
561 
562 
563  template<typename T, typename CloudType>
564  size_t KDTreeBalancedPtInNodesStackOpenCL<T, CloudType>::getTreeSize(size_t elCount) const
565  {
566  // FIXME: 64 bits safe stuff, only work for 2^32 elements right now
567  size_t count = 0;
568  int i = 31;
569  for (; i >= 0; --i)
570  {
571  if (elCount & (1 << i))
572  break;
573  }
574  for (int j = 0; j <= i; ++j)
575  count |= (1 << j);
576  //cerr << "tree size " << count << " (" << elCount << " elements)\n";
577  return count;
578  }
579 
580  template<typename T, typename CloudType>
581  size_t KDTreeBalancedPtInNodesStackOpenCL<T, CloudType>::getTreeDepth(size_t elCount) const
582  {
583  // FIXME: 64 bits safe stuff, only work for 2^32 elements right now
584  int i = 31;
585  for (; i >= 0; --i)
586  {
587  if (elCount & (1 << i))
588  break;
589  }
590  return i + 1;
591  }
592 
593  template<typename T, typename CloudType>
594  void KDTreeBalancedPtInNodesStackOpenCL<T, CloudType>::buildNodes(const BuildPointsIt first, const BuildPointsIt last, const size_t pos, const Vector minValues, const Vector maxValues)
595  {
596  const size_t count(last - first);
597  //cerr << count << endl;
598  if (count == 1)
599  {
600  nodes[pos] = Node(-1, *first);
601  return;
602  }
603 
604  // find the largest dimension of the box
605  const size_t cutDim = argMax<T, CloudType>(maxValues - minValues);
606 
607  // compute number of elements
608  const size_t recurseCount(count-1);
609  const size_t rightCount(recurseCount/2);
610  const size_t leftCount(recurseCount-rightCount);
611  assert(last - rightCount == first + leftCount + 1);
612 
613  // sort
614  nth_element(first, first + leftCount, last, CompareDim(cloud, cutDim));
615 
616  // set node
617  const Index index(*(first+leftCount));
618  const T cutVal(cloud.coeff(cutDim, index));
619  nodes[pos] = Node(cutDim, index);
620 
621  //cerr << pos << " cutting on " << cutDim << " at " << (first+leftCount)->pos[cutDim] << endl;
622 
623  // update bounds for left
624  Vector leftMaxValues(maxValues);
625  leftMaxValues[cutDim] = cutVal;
626  // update bounds for right
627  Vector rightMinValues(minValues);
628  rightMinValues[cutDim] = cutVal;
629 
630  // recurse
631  if (count > 2)
632  {
633  buildNodes(first, first + leftCount, childLeft(pos), minValues, leftMaxValues);
634  buildNodes(first + leftCount + 1, last, childRight(pos), rightMinValues, maxValues);
635  }
636  else
637  {
638  nodes[childLeft(pos)] = Node(-1, *first);
639  nodes[childRight(pos)] = Node(-2, 0);
640  }
641  }
642 
643  template<typename T, typename CloudType>
644  KDTreeBalancedPtInNodesStackOpenCL<T, CloudType>::KDTreeBalancedPtInNodesStackOpenCL(const CloudType& cloud, const Index dim, const unsigned creationOptionFlags, const cl_device_type deviceType):
645  OpenCLSearch<T, CloudType>::OpenCLSearch(cloud, dim, creationOptionFlags, deviceType)
646  {
648 
649  // build point vector and compute bounds
650  BuildPoints buildPoints;
651  buildPoints.reserve(cloud.cols());
652  for (int i = 0; i < cloud.cols(); ++i)
653  {
654  buildPoints.push_back(i);
655  const Vector& v(cloud.block(0,i,this->dim,1));
656 #ifdef EIGEN3_API
657  const_cast<Vector&>(minBound) = minBound.array().min(v.array());
658  const_cast<Vector&>(maxBound) = maxBound.array().max(v.array());
659 #else // EIGEN3_API
660  const_cast<Vector&>(minBound) = minBound.cwise().min(v);
661  const_cast<Vector&>(maxBound) = maxBound.cwise().max(v);
662 #endif // EIGEN3_API
663  }
664 
665  // create nodes
666  nodes.resize(getTreeSize(cloud.cols()));
667  buildNodes(buildPoints.begin(), buildPoints.end(), 0, minBound, maxBound);
668  const unsigned maxStackDepth(getTreeDepth(nodes.size()) + 1);
669 
670  // init openCL
671  initOpenCL("knn_kdtree_pt_in_nodes.cl", "knnKDTree", "#define MAX_STACK_DEPTH " + std::to_string(maxStackDepth) + "\n");
672 
673  // map nodes, for info about alignment, see sect 6.1.5
674  const size_t nodesCLSize(nodes.size() * sizeof(Node));
675  nodesCL = cl::Buffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, nodesCLSize, &nodes[0]);
676  if (collectStatistics)
677  knnKernel.setArg(12, sizeof(cl_mem), &nodesCL);
678  else
679  knnKernel.setArg(11, sizeof(cl_mem), &nodesCL);
680  }
681 
682  template struct KDTreeBalancedPtInNodesStackOpenCL<float>;
683  template struct KDTreeBalancedPtInNodesStackOpenCL<double>;
684  template struct KDTreeBalancedPtInNodesStackOpenCL<float, Eigen::Matrix3Xf>;
685  template struct KDTreeBalancedPtInNodesStackOpenCL<double, Eigen::Matrix3Xd>;
686  template struct KDTreeBalancedPtInNodesStackOpenCL<float, Eigen::Map<const Eigen::Matrix3Xf, Eigen::Aligned> >;
687  template struct KDTreeBalancedPtInNodesStackOpenCL<double, Eigen::Map<const Eigen::Matrix3Xd, Eigen::Aligned> >;
688 
690 }
691 
692 #endif // HAVE_OPENCL
693 /* vim: set ts=8 sw=8 tw=0 noexpandtab cindent softtabstop=8 :*/
Nabo
Namespace for Nabo.
Definition: experimental/kdtree_cpu.cpp:40
index_heap.h
implementation of index heaps
nabo_private.h
header for implementation
Nabo::argMax
size_t argMax(const typename NearestNeighbourSearch< T, CloudType >::Vector &v)
Return the index of the maximum value of a vector of positive values.
Definition: experimental/kdtree_cpu.cpp:45
Nabo::NearestNeighbourSearch< T, Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic > >::maxBound
const Vector maxBound
the high bound of the search space (axis-aligned bounding box)
Definition: nabo.h:282
Index
NNSNabo::Index Index
Definition: python/nabo.cpp:11
Nabo::KDTreeUnbalancedPtInLeavesImplicitBoundsStackOpt::nodes
Nodes nodes
search nodes
Definition: nabo_private.h:174
Nabo::NearestNeighbourSearch< T, Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic > >::cloud
const CloudType & cloud
the reference to the data-point cloud, which must remain valid during the lifetime of the NearestNeig...
Definition: nabo.h:274
Nabo::KDTreeUnbalancedPtInLeavesImplicitBoundsStackOpt::Vector
NearestNeighbourSearch< T, CloudType >::Vector Vector
Definition: nabo_private.h:96
Nabo::NearestNeighbourSearch< T, Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic > >::creationOptionFlags
const unsigned creationOptionFlags
creation options
Definition: nabo.h:278
Nabo::KDTreeUnbalancedPtInLeavesImplicitBoundsStackOpt::BuildPoints
std::vector< Index > BuildPoints
indices of points during kd-tree construction
Definition: nabo_private.h:111
Nabo::NearestNeighbourSearch< T, Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic > >::minBound
const Vector minBound
the low bound of the search space (axis-aligned bounding box)
Definition: nabo.h:280
std
Definition: any.hpp:450
Nabo::KDTreeUnbalancedPtInLeavesImplicitBoundsStackOpt::buildNodes
unsigned buildNodes(const BuildPointsIt first, const BuildPointsIt last, const Vector minValues, const Vector maxValues)
construct nodes for points [first..last[ inside the hyperrectangle [minValues..maxValues]
Definition: nabo/kdtree_cpu.cpp:110
NearestNeighbourSearch
Definition: python/nabo.cpp:95


libnabo
Author(s): Stéphane Magnenat
autogenerated on Fri Aug 2 2024 08:39:11