lsh_table.h
Go to the documentation of this file.
1 /***********************************************************************
2  * Software License Agreement (BSD License)
3  *
4  * Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
5  * Copyright 2008-2009 David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
6  *
7  * THE BSD LICENSE
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  * notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in the
17  * documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *************************************************************************/
30 
31 /***********************************************************************
32  * Author: Vincent Rabaud
33  *************************************************************************/
34 
35 #ifndef RTABMAP_FLANN_LSH_TABLE_H_
36 #define RTABMAP_FLANN_LSH_TABLE_H_
37 
38 #include <algorithm>
39 #include <iostream>
40 #include <iomanip>
41 #include <limits.h>
42 // TODO as soon as we use C++0x, use the code in USE_UNORDERED_MAP
43 #if RTFLANN_USE_UNORDERED_MAP
44 #include <unordered_map>
45 #else
46 #include <map>
47 #endif
48 #include <math.h>
49 #include <stddef.h>
50 #include <random>
51 
53 #include "rtflann/util/matrix.h"
54 
55 namespace rtflann
56 {
57 
58 namespace lsh
59 {
60 
62 
65 typedef uint32_t FeatureIndex;
68 typedef unsigned int BucketKey;
69 
72 typedef std::vector<FeatureIndex> Bucket;
73 
75 
78 struct LshStats
79 {
80  std::vector<unsigned int> bucket_sizes_;
81  size_t n_buckets_;
82  size_t bucket_size_mean_;
83  size_t bucket_size_median_;
84  size_t bucket_size_min_;
85  size_t bucket_size_max_;
86  size_t bucket_size_std_dev;
89  std::vector<std::vector<unsigned int> > size_histogram_;
90 };
91 
97 inline std::ostream& operator <<(std::ostream& out, const LshStats& stats)
98 {
99  size_t w = 20;
100  out << "Lsh Table Stats:\n" << std::setw(w) << std::setiosflags(std::ios::right) << "N buckets : "
101  << stats.n_buckets_ << "\n" << std::setw(w) << std::setiosflags(std::ios::right) << "mean size : "
102  << std::setiosflags(std::ios::left) << stats.bucket_size_mean_ << "\n" << std::setw(w)
103  << std::setiosflags(std::ios::right) << "median size : " << stats.bucket_size_median_ << "\n" << std::setw(w)
104  << std::setiosflags(std::ios::right) << "min size : " << std::setiosflags(std::ios::left)
105  << stats.bucket_size_min_ << "\n" << std::setw(w) << std::setiosflags(std::ios::right) << "max size : "
106  << std::setiosflags(std::ios::left) << stats.bucket_size_max_;
107 
108  // Display the histogram
109  out << std::endl << std::setw(w) << std::setiosflags(std::ios::right) << "histogram : "
110  << std::setiosflags(std::ios::left);
111  for (std::vector<std::vector<unsigned int> >::const_iterator iterator = stats.size_histogram_.begin(), end =
112  stats.size_histogram_.end(); iterator != end; ++iterator) out << (*iterator)[0] << "-" << (*iterator)[1] << ": " << (*iterator)[2] << ", ";
113 
114  return out;
115 }
116 
117 
119 
125 template<typename ElementType>
126 class LshTable
127 {
128 public:
131 #if RTFLANN_USE_UNORDERED_MAP
132  typedef std::unordered_map<BucketKey, Bucket> BucketsSpace;
133 #else
134  typedef std::map<BucketKey, Bucket> BucketsSpace;
135 #endif
136 
139  typedef std::vector<Bucket> BucketsSpeed;
140 
144  {
145  }
146 
152  LshTable(unsigned int /*feature_size*/, unsigned int /*key_size*/)
153  {
154  std::cerr << "LSH is not implemented for that type" << std::endl;
155  throw;
156  }
157 
162  void add(unsigned int value, const ElementType* feature)
163  {
164  // Add the value to the corresponding bucket
165  BucketKey key = getKey(feature);
166 
167  switch (speed_level_) {
168  case kArray:
169  // That means we get the buckets from an array
170  buckets_speed_[key].push_back(value);
171  break;
172  case kBitsetHash:
173  // That means we can check the bitset for the presence of a key
175  buckets_space_[key].push_back(value);
176  break;
177  case kHash:
178  {
179  // That means we have to check for the hash table for the presence of a key
180  buckets_space_[key].push_back(value);
181  break;
182  }
183  }
184  }
185 
189  void add(const std::vector< std::pair<size_t, ElementType*> >& features)
190  {
191 #if RTFLANN_USE_UNORDERED_MAP
192  buckets_space_.rehash((buckets_space_.size() + features.size()) * 1.2);
193 #endif
194  // Add the features to the table
195  for (size_t i = 0; i < features.size(); ++i) {
196  add(features[i].first, features[i].second);
197  }
198  // Now that the table is full, optimize it for speed/space
200  }
201 
206  inline const Bucket* getBucketFromKey(BucketKey key) const
207  {
208  // Generate other buckets
209  switch (speed_level_) {
210  case kArray:
211  // That means we get the buckets from an array
212  return &buckets_speed_[key];
213  break;
214  case kBitsetHash:
215  // That means we can check the bitset for the presence of a key
216  if (key_bitset_.test(key)) return &buckets_space_.find(key)->second;
217  else return 0;
218  break;
219  case kHash:
220  {
221  // That means we have to check for the hash table for the presence of a key
222  BucketsSpace::const_iterator bucket_it, bucket_end = buckets_space_.end();
223  bucket_it = buckets_space_.find(key);
224  // Stop here if that bucket does not exist
225  if (bucket_it == bucket_end) return 0;
226  else return &bucket_it->second;
227  break;
228  }
229  }
230  return 0;
231  }
232 
235  size_t getKey(const ElementType* /*feature*/) const
236  {
237  std::cerr << "LSH is not implemented for that type" << std::endl;
238  throw;
239  return 1;
240  }
241 
245  LshStats getStats() const;
246 
247 private:
253  enum SpeedLevel
254  {
256  };
257 
260  void initialize(size_t key_size)
261  {
263  key_size_ = key_size;
264  }
265 
268  void optimize()
269  {
270  // If we are already using the fast storage, no need to do anything
271  if (speed_level_ == kArray) return;
272 
273  // Use an array if it will be more than half full
274  if (buckets_space_.size() > ((size_t(1) << key_size_) / 2)) {
276  // Fill the array version of it
277  buckets_speed_.resize(size_t(1) << key_size_);
278  for (BucketsSpace::const_iterator key_bucket = buckets_space_.begin(); key_bucket != buckets_space_.end(); ++key_bucket) buckets_speed_[key_bucket->first] = key_bucket->second;
279 
280  // Empty the hash table
281  buckets_space_.clear();
282  return;
283  }
284 
285  // If the bitset is going to use less than 10% of the RAM of the hash map (at least 1 size_t for the key and two
286  // for the vector) or less than 512MB (key_size_ <= 30)
287  if (((std::max(buckets_space_.size(), buckets_speed_.size()) * CHAR_BIT * 3 * sizeof(BucketKey)) / 10
288  >= size_t(size_t(1) << key_size_)) || (key_size_ <= 32)) {
290  key_bitset_.resize(size_t(1) << key_size_);
291  key_bitset_.reset();
292  // Try with the BucketsSpace
293  for (BucketsSpace::const_iterator key_bucket = buckets_space_.begin(); key_bucket != buckets_space_.end(); ++key_bucket) key_bitset_.set(key_bucket->first);
294  }
295  else {
297  key_bitset_.clear();
298  }
299  }
300 
301  template<typename Archive>
302  void serialize(Archive& ar)
303  {
304  int val;
305  if (Archive::is_saving::value) {
306  val = (int)speed_level_;
307  }
308  ar & val;
309  if (Archive::is_loading::value) {
310  speed_level_ = (SpeedLevel) val;
311  }
312 
313  ar & key_size_;
314  ar & mask_;
315 
316  if (speed_level_==kArray) {
317  ar & buckets_speed_;
318  }
321  }
322  if (speed_level_==kBitsetHash) {
323  ar & key_bitset_;
324  }
325  }
326  friend struct serialization::access;
327 
331 
335 
338 
342  DynamicBitset key_bitset_;
343 
346  unsigned int key_size_;
347 
348  // Members only used for the unsigned char specialization
352  std::vector<size_t> mask_;
353 };
354 
356 // Specialization for unsigned char
357 
358 template<>
359 inline LshTable<unsigned char>::LshTable(unsigned int feature_size, unsigned int subsignature_size)
360 {
361  initialize(subsignature_size);
362  // Allocate the mask
363  mask_ = std::vector<size_t>((size_t)ceil((float)(feature_size * sizeof(char)) / (float)sizeof(size_t)), 0);
364 
365  // A bit brutal but fast to code
366  std::vector<size_t> indices(feature_size * CHAR_BIT);
367  for (size_t i = 0; i < feature_size * CHAR_BIT; ++i) indices[i] = i;
368  std::random_device rd;
369  std::mt19937 g(rd());
370  std::shuffle(indices.begin(), indices.end(), g);
371 
372  // Generate a random set of order of subsignature_size_ bits
373  for (unsigned int i = 0; i < key_size_; ++i) {
374  size_t index = indices[i];
375 
376  // Set that bit in the mask
377  size_t divisor = CHAR_BIT * sizeof(size_t);
378  size_t idx = index / divisor; //pick the right size_t index
379  mask_[idx] |= size_t(1) << (index % divisor); //use modulo to find the bit offset
380  }
381 
382  // Set to 1 if you want to display the mask for debug
383 #if 0
384  {
385  size_t bcount = 0;
386  BOOST_FOREACH(size_t mask_block, mask_){
387  out << std::setw(sizeof(size_t) * CHAR_BIT / 4) << std::setfill('0') << std::hex << mask_block
388  << std::endl;
389  bcount += __builtin_popcountll(mask_block);
390  }
391  out << "bit count : " << std::dec << bcount << std::endl;
392  out << "mask size : " << mask_.size() << std::endl;
393  return out;
394  }
395 #endif
396 }
397 
401 template<>
402 inline size_t LshTable<unsigned char>::getKey(const unsigned char* feature) const
403 {
404  // no need to check if T is dividable by sizeof(size_t) like in the Hamming
405  // distance computation as we have a mask
406  const size_t* feature_block_ptr = reinterpret_cast<const size_t*> (feature);
407 
408  // Figure out the subsignature of the feature
409  // Given the feature ABCDEF, and the mask 001011, the output will be
410  // 000CEF
411  size_t subsignature = 0;
412  size_t bit_index = 1;
413 
414  for (std::vector<size_t>::const_iterator pmask_block = mask_.begin(); pmask_block != mask_.end(); ++pmask_block) {
415  // get the mask and signature blocks
416  size_t feature_block = *feature_block_ptr;
417  size_t mask_block = *pmask_block;
418  while (mask_block) {
419  // Get the lowest set bit in the mask block
420  size_t lowest_bit = mask_block & (-(ptrdiff_t)mask_block);
421  // Add it to the current subsignature if necessary
422  subsignature += (feature_block & lowest_bit) ? bit_index : 0;
423  // Reset the bit in the mask block
424  mask_block ^= lowest_bit;
425  // increment the bit index for the subsignature
426  bit_index <<= 1;
427  }
428  // Check the next feature block
429  ++feature_block_ptr;
430  }
431  return subsignature;
432 }
433 
434 template<>
435 inline LshStats LshTable<unsigned char>::getStats() const
436 {
437  LshStats stats;
438  stats.bucket_size_mean_ = 0;
439  if ((buckets_speed_.empty()) && (buckets_space_.empty())) {
440  stats.n_buckets_ = 0;
441  stats.bucket_size_median_ = 0;
442  stats.bucket_size_min_ = 0;
443  stats.bucket_size_max_ = 0;
444  return stats;
445  }
446 
447  if (!buckets_speed_.empty()) {
448  for (BucketsSpeed::const_iterator pbucket = buckets_speed_.begin(); pbucket != buckets_speed_.end(); ++pbucket) {
449  stats.bucket_sizes_.push_back(pbucket->size());
450  stats.bucket_size_mean_ += pbucket->size();
451  }
452  stats.bucket_size_mean_ /= buckets_speed_.size();
453  stats.n_buckets_ = buckets_speed_.size();
454  }
455  else {
456  for (BucketsSpace::const_iterator x = buckets_space_.begin(); x != buckets_space_.end(); ++x) {
457  stats.bucket_sizes_.push_back(x->second.size());
458  stats.bucket_size_mean_ += x->second.size();
459  }
460  stats.bucket_size_mean_ /= buckets_space_.size();
461  stats.n_buckets_ = buckets_space_.size();
462  }
463 
464  std::sort(stats.bucket_sizes_.begin(), stats.bucket_sizes_.end());
465 
466  // BOOST_FOREACH(int size, stats.bucket_sizes_)
467  // std::cout << size << " ";
468  // std::cout << std::endl;
469  stats.bucket_size_median_ = stats.bucket_sizes_[stats.bucket_sizes_.size() / 2];
470  stats.bucket_size_min_ = stats.bucket_sizes_.front();
471  stats.bucket_size_max_ = stats.bucket_sizes_.back();
472 
473  // TODO compute mean and std
474  /*float mean, stddev;
475  stats.bucket_size_mean_ = mean;
476  stats.bucket_size_std_dev = stddev;*/
477 
478  // Include a histogram of the buckets
479  unsigned int bin_start = 0;
480  unsigned int bin_end = 20;
481  bool is_new_bin = true;
482  for (std::vector<unsigned int>::iterator iterator = stats.bucket_sizes_.begin(), end = stats.bucket_sizes_.end(); iterator
483  != end; )
484  if (*iterator < bin_end) {
485  if (is_new_bin) {
486  stats.size_histogram_.push_back(std::vector<unsigned int>(3, 0));
487  stats.size_histogram_.back()[0] = bin_start;
488  stats.size_histogram_.back()[1] = bin_end - 1;
489  is_new_bin = false;
490  }
491  ++stats.size_histogram_.back()[2];
492  ++iterator;
493  }
494  else {
495  bin_start += 20;
496  bin_end += 20;
497  is_new_bin = true;
498  }
499 
500  return stats;
501 }
502 
503 // End the two namespaces
504 }
505 }
506 
508 
509 #endif /* FLANN_LSH_TABLE_H_ */
rtflann::lsh::LshTable::SpeedLevel
SpeedLevel
Definition: lsh_table.h:313
rtflann::lsh::LshTable::optimize
void optimize()
Definition: lsh_table.h:328
w
RowVector3d w
int
int
rtflann::lsh::LshTable::kBitsetHash
@ kBitsetHash
Definition: lsh_table.h:315
rtflann::DynamicBitset::reset
void reset()
Definition: dynamic_bitset.h:147
rtflann::lsh::LshTable::key_size_
unsigned int key_size_
Definition: lsh_table.h:406
rtflann::lsh::LshStats::bucket_size_min_
size_t bucket_size_min_
Definition: lsh_table.h:144
rtflann::lsh::LshStats::n_buckets_
size_t n_buckets_
Definition: lsh_table.h:141
rtflann::DynamicBitset::clear
void clear()
Definition: dynamic_bitset.h:132
rtflann::lsh::LshTable::initialize
void initialize(size_t key_size)
Definition: lsh_table.h:320
uint32_t
::uint32_t uint32_t
rtflann::lsh::FeatureIndex
uint32_t FeatureIndex
Definition: lsh_table.h:125
rtflann::lsh::LshTable::getBucketFromKey
const Bucket * getBucketFromKey(BucketKey key) const
Definition: lsh_table.h:266
rtflann::lsh::LshTable::kArray
@ kArray
Definition: lsh_table.h:315
iterator
rtflann::lsh::LshTable::key_bitset_
DynamicBitset key_bitset_
Definition: lsh_table.h:402
rtflann::lsh::operator<<
std::ostream & operator<<(std::ostream &out, const LshStats &stats)
Definition: lsh_table.h:157
dynamic_bitset.h
rtflann::lsh::LshTable
Definition: lsh_table.h:186
rtflann::lsh::LshTable::buckets_space_
BucketsSpace buckets_space_
Definition: lsh_table.h:394
rtflann::lsh::LshTable::LshTable
LshTable()
Definition: lsh_table.h:203
indices
indices
rtflann::DynamicBitset::test
bool test(size_t index) const
Definition: dynamic_bitset.h:199
rtflann::DynamicBitset::resize
void resize(size_t size)
Definition: dynamic_bitset.h:174
rtflann::lsh::LshStats::bucket_size_mean_
size_t bucket_size_mean_
Definition: lsh_table.h:142
rtflann::lsh::Bucket
std::vector< FeatureIndex > Bucket
Definition: lsh_table.h:132
matrix.h
rtflann::lsh::LshStats::bucket_size_max_
size_t bucket_size_max_
Definition: lsh_table.h:145
rtflann::lsh::LshTable::BucketsSpace
std::map< BucketKey, Bucket > BucketsSpace
Definition: lsh_table.h:194
g
float g
stats
bool stats
rtflann::lsh::LshTable::getKey
size_t getKey(const ElementType *) const
Definition: lsh_table.h:295
first
constexpr int first(int i)
glm::ceil
GLM_FUNC_DECL genType ceil(genType const &x)
glm::max
GLM_FUNC_DECL genType max(genType const &x, genType const &y)
rtflann::lsh::LshStats::bucket_sizes_
std::vector< unsigned int > bucket_sizes_
Definition: lsh_table.h:140
rtflann::lsh::LshTable::speed_level_
SpeedLevel speed_level_
Definition: lsh_table.h:397
x
x
size_t
std::size_t size_t
out
std::ofstream out("Result.txt")
rtflann::lsh::LshTable::mask_
std::vector< size_t > mask_
Definition: lsh_table.h:412
key
const gtsam::Symbol key( 'X', 0)
rtflann::lsh::LshStats::size_histogram_
std::vector< std::vector< unsigned int > > size_histogram_
Definition: lsh_table.h:149
rtflann::DynamicBitset::set
void set(size_t index)
Definition: dynamic_bitset.h:183
rtflann::lsh::LshTable::getStats
LshStats getStats() const
rtflann::lsh::LshTable::BucketsSpeed
std::vector< Bucket > BucketsSpeed
Definition: lsh_table.h:199
rtflann::lsh::LshTable::buckets_speed_
BucketsSpeed buckets_speed_
Definition: lsh_table.h:390
rtflann::lsh::LshTable::serialize
void serialize(Archive &ar)
Definition: lsh_table.h:362
rtflann::lsh::BucketKey
unsigned int BucketKey
Definition: lsh_table.h:128
rtflann::lsh::LshStats::bucket_size_median_
size_t bucket_size_median_
Definition: lsh_table.h:143
scan_step::second
@ second
rtflann::lsh::LshTable::kHash
@ kHash
Definition: lsh_table.h:315
rtflann::lsh::LshStats::bucket_size_std_dev
size_t bucket_size_std_dev
Definition: lsh_table.h:146
rtflann
Definition: all_indices.h:49
value
value
i
int i
initialize
Values initialize(const NonlinearFactorGraph &graph, bool useOdometricPath)
rtflann::lsh::LshTable::add
void add(unsigned int value, const ElementType *feature)
Definition: lsh_table.h:222


rtabmap
Author(s): Mathieu Labbe
autogenerated on Thu Jul 25 2024 02:50:11