hierarchical_clustering_index.h
Go to the documentation of this file.
1 /***********************************************************************
2  * Software License Agreement (BSD License)
3  *
4  * Copyright 2008-2011 Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
5  * Copyright 2008-2011 David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
6  *
7  * THE BSD LICENSE
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  * notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in the
17  * documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *************************************************************************/
30 
31 #ifndef RTABMAP_FLANN_HIERARCHICAL_CLUSTERING_INDEX_H_
32 #define RTABMAP_FLANN_HIERARCHICAL_CLUSTERING_INDEX_H_
33 
34 #include <algorithm>
35 #include <string>
36 #include <map>
37 #include <cassert>
38 #include <limits>
39 #include <cmath>
40 
41 #ifndef SIZE_MAX
42 #define SIZE_MAX ((size_t) -1)
43 #endif
44 
45 #include "rtflann/general.h"
48 #include "rtflann/util/matrix.h"
50 #include "rtflann/util/heap.h"
51 #include "rtflann/util/allocator.h"
52 #include "rtflann/util/random.h"
53 #include "rtflann/util/saving.h"
55 
56 namespace rtflann
57 {
58 
60 {
63  int trees = 4, int leaf_max_size = 100)
64  {
65  (*this)["algorithm"] = FLANN_INDEX_HIERARCHICAL;
66  // The branching factor used in the hierarchical clustering
67  (*this)["branching"] = branching;
68  // Algorithm used for picking the initial cluster centers
69  (*this)["centers_init"] = centers_init;
70  // number of parallel trees to build
71  (*this)["trees"] = trees;
72  // maximum leaf size
73  (*this)["leaf_max_size"] = leaf_max_size;
74  }
75 };
76 
77 
78 
85 template <typename Distance>
86 class HierarchicalClusteringIndex : public NNIndex<Distance>
87 {
88 public:
89  typedef typename Distance::ElementType ElementType;
90  typedef typename Distance::ResultType DistanceType;
91 
93 
100  HierarchicalClusteringIndex(const IndexParams& index_params = HierarchicalClusteringIndexParams(), Distance d = Distance())
101  : BaseClass(index_params, d)
102  {
103  memoryCounter_ = 0;
104 
105  branching_ = get_param(index_params_,"branching",32);
106  centers_init_ = get_param(index_params_,"centers_init", FLANN_CENTERS_RANDOM);
107  trees_ = get_param(index_params_,"trees",4);
108  leaf_max_size_ = get_param(index_params_,"leaf_max_size",100);
109 
110  initCenterChooser();
111  }
112 
113 
122  Distance d = Distance())
123  : BaseClass(index_params, d)
124  {
125  memoryCounter_ = 0;
126 
127  branching_ = get_param(index_params_,"branching",32);
128  centers_init_ = get_param(index_params_,"centers_init", FLANN_CENTERS_RANDOM);
129  trees_ = get_param(index_params_,"trees",4);
130  leaf_max_size_ = get_param(index_params_,"leaf_max_size",100);
131 
132  initCenterChooser();
133 
134  setDataset(inputData);
135 
136  chooseCenters_->setDataSize(veclen_);
137  }
138 
139 
141  memoryCounter_(other.memoryCounter_),
142  branching_(other.branching_),
143  trees_(other.trees_),
144  centers_init_(other.centers_init_),
145  leaf_max_size_(other.leaf_max_size_)
146 
147  {
148  initCenterChooser();
149  tree_roots_.resize(other.tree_roots_.size());
150  for (size_t i=0;i<tree_roots_.size();++i) {
151  copyTree(tree_roots_[i], other.tree_roots_[i]);
152  }
153  }
154 
156  {
157  this->swap(other);
158  return *this;
159  }
160 
161 
163  {
164  switch(centers_init_) {
166  chooseCenters_ = new RandomCenterChooser<Distance>(distance_, points_);
167  break;
169  chooseCenters_ = new GonzalesCenterChooser<Distance>(distance_, points_);
170  break;
172  chooseCenters_ = new KMeansppCenterChooser<Distance>(distance_, points_);
173  break;
175  chooseCenters_ = new GroupWiseCenterChooser<Distance>(distance_, points_);
176  break;
177  default:
178  throw FLANNException("Unknown algorithm for choosing initial centers.");
179  }
180  }
181 
188  {
189  delete chooseCenters_;
190  freeIndex();
191  }
192 
193  BaseClass* clone() const
194  {
195  return new HierarchicalClusteringIndex(*this);
196  }
197 
202  int usedMemory() const
203  {
204  return pool_.usedMemory+pool_.wastedMemory+memoryCounter_;
205  }
206 
207  using BaseClass::buildIndex;
208 
209  void addPoints(const Matrix<ElementType>& points, float rebuild_threshold = 2)
210  {
211  assert(points.cols==veclen_);
212  size_t old_size = size_;
213 
214  extendDataset(points);
215 
216  if (rebuild_threshold>1 && size_at_build_*rebuild_threshold<size_) {
217  buildIndex();
218  }
219  else {
220  for (size_t i=0;i<points.rows;++i) {
221  for (int j = 0; j < trees_; j++) {
222  addPointToTree(tree_roots_[j], old_size + i);
223  }
224  }
225  }
226  }
227 
228 
230  {
232  }
233 
234 
235  template<typename Archive>
236  void serialize(Archive& ar)
237  {
238  ar.setObject(this);
239 
240  ar & *static_cast<NNIndex<Distance>*>(this);
241 
242  ar & branching_;
243  ar & trees_;
244  ar & centers_init_;
245  ar & leaf_max_size_;
246 
247  if (Archive::is_loading::value) {
248  tree_roots_.resize(trees_);
249  }
250  for (size_t i=0;i<tree_roots_.size();++i) {
251  if (Archive::is_loading::value) {
252  tree_roots_[i] = new(pool_) Node();
253  }
254  ar & *tree_roots_[i];
255  }
256 
257  if (Archive::is_loading::value) {
258  index_params_["algorithm"] = getType();
259  index_params_["branching"] = branching_;
260  index_params_["trees"] = trees_;
261  index_params_["centers_init"] = centers_init_;
262  index_params_["leaf_size"] = leaf_max_size_;
263  }
264  }
265 
266  void saveIndex(FILE* stream)
267  {
268  serialization::SaveArchive sa(stream);
269  sa & *this;
270  }
271 
272 
273  void loadIndex(FILE* stream)
274  {
275  serialization::LoadArchive la(stream);
276  la & *this;
277  }
278 
279 
290  void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams) const
291  {
292  if (removed_) {
293  findNeighborsWithRemoved<true>(result, vec, searchParams);
294  }
295  else {
296  findNeighborsWithRemoved<false>(result, vec, searchParams);
297  }
298  }
299 
300 protected:
301 
306  {
307  chooseCenters_->setDataSize(veclen_);
308 
309  if (branching_<2) {
310  throw FLANNException("Branching factor must be at least 2");
311  }
312  tree_roots_.resize(trees_);
313  std::vector<int> indices(size_);
314  for (int i=0; i<trees_; ++i) {
315  for (size_t j=0; j<size_; ++j) {
316  indices[j] = j;
317  }
318  tree_roots_[i] = new(pool_) Node();
319  computeClustering(tree_roots_[i], &indices[0], size_);
320  }
321  }
322 
323 private:
324 
325  struct PointInfo
326  {
328  size_t index;
330  ElementType* point;
331 
332  private:
333  template<typename Archive>
334  void serialize(Archive& ar)
335  {
337  Index* obj = static_cast<Index*>(ar.getObject());
338 
339  ar & index;
340 // ar & point;
341 
342  if (Archive::is_loading::value) {
343  point = obj->points_[index];
344  }
345  }
346  friend struct serialization::access;
347  };
348 
352  struct Node
353  {
357  ElementType* pivot;
358  size_t pivot_index;
362  std::vector<Node*> childs;
366  std::vector<PointInfo> points;
367 
368  Node(){
369  pivot = NULL;
370  pivot_index = SIZE_MAX;
371  }
377  {
378  for(size_t i=0; i<childs.size(); i++){
379  childs[i]->~Node();
380  pivot = NULL;
381  pivot_index = -1;
382  }
383  };
384 
385  private:
386  template<typename Archive>
387  void serialize(Archive& ar)
388  {
390  Index* obj = static_cast<Index*>(ar.getObject());
391  ar & pivot_index;
392  if (Archive::is_loading::value) {
393  if (pivot_index != SIZE_MAX)
394  pivot = obj->points_[pivot_index];
395  else
396  pivot = NULL;
397  }
398  size_t childs_size;
399  if (Archive::is_saving::value) {
400  childs_size = childs.size();
401  }
402  ar & childs_size;
403 
404  if (childs_size==0) {
405  ar & points;
406  }
407  else {
408  if (Archive::is_loading::value) {
409  childs.resize(childs_size);
410  }
411  for (size_t i=0;i<childs_size;++i) {
412  if (Archive::is_loading::value) {
413  childs[i] = new(obj->pool_) Node();
414  }
415  ar & *childs[i];
416  }
417  }
418 
419  }
420  friend struct serialization::access;
421  };
422  typedef Node* NodePtr;
423 
424 
425 
430 
431 
436  void freeIndex(){
437  for (size_t i=0; i<tree_roots_.size(); ++i) {
438  tree_roots_[i]->~Node();
439  }
440  pool_.free();
441  }
442 
443  void copyTree(NodePtr& dst, const NodePtr& src)
444  {
445  dst = new(pool_) Node();
446  dst->pivot_index = src->pivot_index;
447  dst->pivot = points_[dst->pivot_index];
448 
449  if (src->childs.size()==0) {
450  dst->points = src->points;
451  }
452  else {
453  dst->childs.resize(src->childs.size());
454  for (size_t i=0;i<src->childs.size();++i) {
455  copyTree(dst->childs[i], src->childs[i]);
456  }
457  }
458  }
459 
460 
461 
462  void computeLabels(int* indices, int indices_length, int* centers, int centers_length, int* labels, DistanceType& cost)
463  {
464  cost = 0;
465  for (int i=0; i<indices_length; ++i) {
466  ElementType* point = points_[indices[i]];
467  DistanceType dist = distance_(point, points_[centers[0]], veclen_);
468  labels[i] = 0;
469  for (int j=1; j<centers_length; ++j) {
470  DistanceType new_dist = distance_(point, points_[centers[j]], veclen_);
471  if (dist>new_dist) {
472  labels[i] = j;
473  dist = new_dist;
474  }
475  }
476  cost += dist;
477  }
478  }
479 
490  void computeClustering(NodePtr node, int* indices, int indices_length)
491  {
492  if (indices_length < leaf_max_size_) { // leaf node
493  node->points.resize(indices_length);
494  for (int i=0;i<indices_length;++i) {
495  node->points[i].index = indices[i];
496  node->points[i].point = points_[indices[i]];
497  }
498  node->childs.clear();
499  return;
500  }
501 
502  std::vector<int> centers(branching_);
503  std::vector<int> labels(indices_length);
504 
505  int centers_length;
506  (*chooseCenters_)(branching_, indices, indices_length, &centers[0], centers_length);
507 
508  if (centers_length<branching_) {
509  node->points.resize(indices_length);
510  for (int i=0;i<indices_length;++i) {
511  node->points[i].index = indices[i];
512  node->points[i].point = points_[indices[i]];
513  }
514  node->childs.clear();
515  return;
516  }
517 
518 
519  // assign points to clusters
520  DistanceType cost;
521  computeLabels(indices, indices_length, &centers[0], centers_length, &labels[0], cost);
522 
523  node->childs.resize(branching_);
524  int start = 0;
525  int end = start;
526  for (int i=0; i<branching_; ++i) {
527  for (int j=0; j<indices_length; ++j) {
528  if (labels[j]==i) {
529  std::swap(indices[j],indices[end]);
530  std::swap(labels[j],labels[end]);
531  end++;
532  }
533  }
534 
535  node->childs[i] = new(pool_) Node();
536  node->childs[i]->pivot_index = centers[i];
537  node->childs[i]->pivot = points_[centers[i]];
538  node->childs[i]->points.clear();
539  computeClustering(node->childs[i],indices+start, end-start);
540  start=end;
541  }
542  }
543 
544 
545  template<bool with_removed>
546  void findNeighborsWithRemoved(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams) const
547  {
548  int maxChecks = searchParams.checks;
549 
550  // Priority queue storing intermediate branches in the best-bin-first search
551  Heap<BranchSt>* heap = new Heap<BranchSt>(size_);
552 
553  DynamicBitset checked(size_);
554  int checks = 0;
555  for (int i=0; i<trees_; ++i) {
556  findNN<with_removed>(tree_roots_[i], result, vec, checks, maxChecks, heap, checked);
557  }
558 
559  BranchSt branch;
560  while (heap->popMin(branch) && (checks<maxChecks || !result.full())) {
561  NodePtr node = branch.node;
562  findNN<with_removed>(node, result, vec, checks, maxChecks, heap, checked);
563  }
564 
565  delete heap;
566  }
567 
568 
581  template<bool with_removed>
582  void findNN(NodePtr node, ResultSet<DistanceType>& result, const ElementType* vec, int& checks, int maxChecks,
583  Heap<BranchSt>* heap, DynamicBitset& checked) const
584  {
585  if (node->childs.empty()) {
586  if (checks>=maxChecks) {
587  if (result.full()) return;
588  }
589 
590  for (size_t i=0; i<node->points.size(); ++i) {
591  PointInfo& pointInfo = node->points[i];
592  if (with_removed) {
593  if (removed_points_.test(pointInfo.index)) continue;
594  }
595  if (checked.test(pointInfo.index)) continue;
596  DistanceType dist = distance_(pointInfo.point, vec, veclen_);
597  result.addPoint(dist, pointInfo.index);
598  checked.set(pointInfo.index);
599  ++checks;
600  }
601  }
602  else {
603  DistanceType* domain_distances = new DistanceType[branching_];
604  int best_index = 0;
605  domain_distances[best_index] = distance_(vec, node->childs[best_index]->pivot, veclen_);
606  for (int i=1; i<branching_; ++i) {
607  domain_distances[i] = distance_(vec, node->childs[i]->pivot, veclen_);
608  if (domain_distances[i]<domain_distances[best_index]) {
609  best_index = i;
610  }
611  }
612  for (int i=0; i<branching_; ++i) {
613  if (i!=best_index) {
614  heap->insert(BranchSt(node->childs[i],domain_distances[i]));
615  }
616  }
617  delete[] domain_distances;
618  findNN<with_removed>(node->childs[best_index],result,vec, checks, maxChecks, heap, checked);
619  }
620  }
621 
622  void addPointToTree(NodePtr node, size_t index)
623  {
624  ElementType* point = points_[index];
625 
626  if (node->childs.empty()) { // leaf node
627  PointInfo pointInfo;
628  pointInfo.point = point;
629  pointInfo.index = index;
630  node->points.push_back(pointInfo);
631 
632  if (node->points.size()>=size_t(branching_)) {
633  std::vector<int> indices(node->points.size());
634 
635  for (size_t i=0;i<node->points.size();++i) {
636  indices[i] = node->points[i].index;
637  }
638  computeClustering(node, &indices[0], indices.size());
639  }
640  }
641  else {
642  // find the closest child
643  int closest = 0;
644  ElementType* center = node->childs[closest]->pivot;
645  DistanceType dist = distance_(center, point, veclen_);
646  for (size_t i=1;i<size_t(branching_);++i) {
647  center = node->childs[i]->pivot;
648  DistanceType crt_dist = distance_(center, point, veclen_);
649  if (crt_dist<dist) {
650  dist = crt_dist;
651  closest = i;
652  }
653  }
654  addPointToTree(node->childs[closest], index);
655  }
656  }
657 
659  {
660  BaseClass::swap(other);
661 
662  std::swap(tree_roots_, other.tree_roots_);
663  std::swap(pool_, other.pool_);
664  std::swap(memoryCounter_, other.memoryCounter_);
665  std::swap(branching_, other.branching_);
666  std::swap(trees_, other.trees_);
667  std::swap(centers_init_, other.centers_init_);
668  std::swap(leaf_max_size_, other.leaf_max_size_);
669  std::swap(chooseCenters_, other.chooseCenters_);
670  }
671 
672 private:
673 
677  std::vector<Node*> tree_roots_;
678 
687 
692 
698 
702  int trees_;
703 
708 
713 
718 
720 };
721 
722 }
723 
724 #endif /* FLANN_HIERARCHICAL_CLUSTERING_INDEX_H_ */
d
#define NULL
HierarchicalClusteringIndex & operator=(HierarchicalClusteringIndex other)
std::map< std::string, any > IndexParams
Definition: params.h:51
flann_centers_init_t
Definition: defines.h:95
T get_param(const IndexParams &params, std::string name, const T &default_value)
Definition: params.h:95
#define SIZE_MAX
void findNeighborsWithRemoved(ResultSet< DistanceType > &result, const ElementType *vec, const SearchParams &searchParams) const
void copyTree(NodePtr &dst, const NodePtr &src)
HierarchicalClusteringIndex(const IndexParams &index_params=HierarchicalClusteringIndexParams(), Distance d=Distance())
char * dst
Definition: lz4.h:354
size_t rows
Definition: matrix.h:72
void insert(const T &value)
Definition: heap.h:135
#define USING_BASECLASS_SYMBOLS
Definition: nn_index.h:897
void set(size_t index)
void addPointToTree(NodePtr node, size_t index)
size_t cols
Definition: matrix.h:73
void findNeighbors(ResultSet< DistanceType > &result, const ElementType *vec, const SearchParams &searchParams) const
void swap(HierarchicalClusteringIndex &other)
HierarchicalClusteringIndex(const HierarchicalClusteringIndex &other)
bool test(size_t index) const
flann_algorithm_t
Definition: defines.h:79
HierarchicalClusteringIndex(const Matrix< ElementType > &inputData, const IndexParams &index_params=HierarchicalClusteringIndexParams(), Distance d=Distance())
bool popMin(T &value)
Definition: heap.h:158
struct Index Index
Definition: sqlite3.c:8577
void addPoints(const Matrix< ElementType > &points, float rebuild_threshold=2)
Incrementally add points to the index.
void findNN(NodePtr node, ResultSet< DistanceType > &result, const ElementType *vec, int &checks, int maxChecks, Heap< BranchSt > *heap, DynamicBitset &checked) const
BranchStruct< NodePtr, DistanceType > BranchSt
void computeLabels(int *indices, int indices_length, int *centers, int centers_length, int *labels, DistanceType &cost)
HierarchicalClusteringIndexParams(int branching=32, flann_centers_init_t centers_init=FLANN_CENTERS_RANDOM, int trees=4, int leaf_max_size=100)
virtual bool full() const =0
static void freeIndex(sqlite3 *db, Index *p)
Definition: sqlite3.c:84627
virtual void addPoint(DistanceType dist, size_t index)=0
void computeClustering(NodePtr node, int *indices, int indices_length)


rtabmap
Author(s): Mathieu Labbe
autogenerated on Mon Dec 14 2020 03:34:59