lsh_table.h
Go to the documentation of this file.
1 /***********************************************************************
2  * Software License Agreement (BSD License)
3  *
4  * Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
5  * Copyright 2008-2009 David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
6  *
7  * THE BSD LICENSE
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  * notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in the
17  * documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *************************************************************************/
30 
31 /***********************************************************************
32  * Author: Vincent Rabaud
33  *************************************************************************/
34 
35 #ifndef RTABMAP_FLANN_LSH_TABLE_H_
36 #define RTABMAP_FLANN_LSH_TABLE_H_
37 
38 #include <algorithm>
39 #include <iostream>
40 #include <iomanip>
41 #include <limits.h>
42 // TODO as soon as we use C++0x, use the code in USE_UNORDERED_MAP
43 #if RTFLANN_USE_UNORDERED_MAP
44 #include <unordered_map>
45 #else
46 #include <map>
47 #endif
48 #include <math.h>
49 #include <stddef.h>
50 
52 #include "rtflann/util/matrix.h"
53 
54 namespace rtflann
55 {
56 
57 namespace lsh
58 {
59 
61 
67 typedef unsigned int BucketKey;
68 
71 typedef std::vector<FeatureIndex> Bucket;
72 
74 
77 struct LshStats
78 {
79  std::vector<unsigned int> bucket_sizes_;
80  size_t n_buckets_;
88  std::vector<std::vector<unsigned int> > size_histogram_;
89 };
90 
96 inline std::ostream& operator <<(std::ostream& out, const LshStats& stats)
97 {
98  size_t w = 20;
99  out << "Lsh Table Stats:\n" << std::setw(w) << std::setiosflags(std::ios::right) << "N buckets : "
100  << stats.n_buckets_ << "\n" << std::setw(w) << std::setiosflags(std::ios::right) << "mean size : "
101  << std::setiosflags(std::ios::left) << stats.bucket_size_mean_ << "\n" << std::setw(w)
102  << std::setiosflags(std::ios::right) << "median size : " << stats.bucket_size_median_ << "\n" << std::setw(w)
103  << std::setiosflags(std::ios::right) << "min size : " << std::setiosflags(std::ios::left)
104  << stats.bucket_size_min_ << "\n" << std::setw(w) << std::setiosflags(std::ios::right) << "max size : "
105  << std::setiosflags(std::ios::left) << stats.bucket_size_max_;
106 
107  // Display the histogram
108  out << std::endl << std::setw(w) << std::setiosflags(std::ios::right) << "histogram : "
109  << std::setiosflags(std::ios::left);
110  for (std::vector<std::vector<unsigned int> >::const_iterator iterator = stats.size_histogram_.begin(), end =
111  stats.size_histogram_.end(); iterator != end; ++iterator) out << (*iterator)[0] << "-" << (*iterator)[1] << ": " << (*iterator)[2] << ", ";
112 
113  return out;
114 }
115 
116 
118 
124 template<typename ElementType>
125 class LshTable
126 {
127 public:
130 #if RTFLANN_USE_UNORDERED_MAP
131  typedef std::unordered_map<BucketKey, Bucket> BucketsSpace;
132 #else
133  typedef std::map<BucketKey, Bucket> BucketsSpace;
134 #endif
135 
138  typedef std::vector<Bucket> BucketsSpeed;
139 
143  {
144  }
145 
151  LshTable(unsigned int /*feature_size*/, unsigned int /*key_size*/)
152  {
153  std::cerr << "LSH is not implemented for that type" << std::endl;
154  throw;
155  }
156 
161  void add(unsigned int value, const ElementType* feature)
162  {
163  // Add the value to the corresponding bucket
164  BucketKey key = getKey(feature);
165 
166  switch (speed_level_) {
167  case kArray:
168  // That means we get the buckets from an array
169  buckets_speed_[key].push_back(value);
170  break;
171  case kBitsetHash:
172  // That means we can check the bitset for the presence of a key
173  key_bitset_.set(key);
174  buckets_space_[key].push_back(value);
175  break;
176  case kHash:
177  {
178  // That means we have to check for the hash table for the presence of a key
179  buckets_space_[key].push_back(value);
180  break;
181  }
182  }
183  }
184 
188  void add(const std::vector< std::pair<size_t, ElementType*> >& features)
189  {
190 #if RTFLANN_USE_UNORDERED_MAP
191  buckets_space_.rehash((buckets_space_.size() + features.size()) * 1.2);
192 #endif
193  // Add the features to the table
194  for (size_t i = 0; i < features.size(); ++i) {
195  add(features[i].first, features[i].second);
196  }
197  // Now that the table is full, optimize it for speed/space
198  optimize();
199  }
200 
205  inline const Bucket* getBucketFromKey(BucketKey key) const
206  {
207  // Generate other buckets
208  switch (speed_level_) {
209  case kArray:
210  // That means we get the buckets from an array
211  return &buckets_speed_[key];
212  break;
213  case kBitsetHash:
214  // That means we can check the bitset for the presence of a key
215  if (key_bitset_.test(key)) return &buckets_space_.find(key)->second;
216  else return 0;
217  break;
218  case kHash:
219  {
220  // That means we have to check for the hash table for the presence of a key
221  BucketsSpace::const_iterator bucket_it, bucket_end = buckets_space_.end();
222  bucket_it = buckets_space_.find(key);
223  // Stop here if that bucket does not exist
224  if (bucket_it == bucket_end) return 0;
225  else return &bucket_it->second;
226  break;
227  }
228  }
229  return 0;
230  }
231 
234  size_t getKey(const ElementType* /*feature*/) const
235  {
236  std::cerr << "LSH is not implemented for that type" << std::endl;
237  throw;
238  return 1;
239  }
240 
244  LshStats getStats() const;
245 
246 private:
253  {
254  kArray, kBitsetHash, kHash
255  };
256 
259  void initialize(size_t key_size)
260  {
261  speed_level_ = kHash;
262  key_size_ = key_size;
263  }
264 
267  void optimize()
268  {
269  // If we are already using the fast storage, no need to do anything
270  if (speed_level_ == kArray) return;
271 
272  // Use an array if it will be more than half full
273  if (buckets_space_.size() > ((size_t(1) << key_size_) / 2)) {
274  speed_level_ = kArray;
275  // Fill the array version of it
276  buckets_speed_.resize(size_t(1) << key_size_);
277  for (BucketsSpace::const_iterator key_bucket = buckets_space_.begin(); key_bucket != buckets_space_.end(); ++key_bucket) buckets_speed_[key_bucket->first] = key_bucket->second;
278 
279  // Empty the hash table
280  buckets_space_.clear();
281  return;
282  }
283 
284  // If the bitset is going to use less than 10% of the RAM of the hash map (at least 1 size_t for the key and two
285  // for the vector) or less than 512MB (key_size_ <= 30)
286  if (((std::max(buckets_space_.size(), buckets_speed_.size()) * CHAR_BIT * 3 * sizeof(BucketKey)) / 10
287  >= size_t(size_t(1) << key_size_)) || (key_size_ <= 32)) {
288  speed_level_ = kBitsetHash;
289  key_bitset_.resize(size_t(1) << key_size_);
290  key_bitset_.reset();
291  // Try with the BucketsSpace
292  for (BucketsSpace::const_iterator key_bucket = buckets_space_.begin(); key_bucket != buckets_space_.end(); ++key_bucket) key_bitset_.set(key_bucket->first);
293  }
294  else {
295  speed_level_ = kHash;
296  key_bitset_.clear();
297  }
298  }
299 
300  template<typename Archive>
301  void serialize(Archive& ar)
302  {
303  int val;
304  if (Archive::is_saving::value) {
305  val = (int)speed_level_;
306  }
307  ar & val;
308  if (Archive::is_loading::value) {
309  speed_level_ = (SpeedLevel) val;
310  }
311 
312  ar & key_size_;
313  ar & mask_;
314 
315  if (speed_level_==kArray) {
316  ar & buckets_speed_;
317  }
318  if (speed_level_==kBitsetHash || speed_level_==kHash) {
319  ar & buckets_space_;
320  }
321  if (speed_level_==kBitsetHash) {
322  ar & key_bitset_;
323  }
324  }
325  friend struct serialization::access;
326 
329  BucketsSpeed buckets_speed_;
330 
333  BucketsSpace buckets_space_;
334 
337 
342 
345  unsigned int key_size_;
346 
347  // Members only used for the unsigned char specialization
351  std::vector<size_t> mask_;
352 };
353 
355 // Specialization for unsigned char
356 
357 template<>
358 inline LshTable<unsigned char>::LshTable(unsigned int feature_size, unsigned int subsignature_size)
359 {
360  initialize(subsignature_size);
361  // Allocate the mask
362  mask_ = std::vector<size_t>((size_t)ceil((float)(feature_size * sizeof(char)) / (float)sizeof(size_t)), 0);
363 
364  // A bit brutal but fast to code
365  std::vector<size_t> indices(feature_size * CHAR_BIT);
366  for (size_t i = 0; i < feature_size * CHAR_BIT; ++i) indices[i] = i;
367  std::random_shuffle(indices.begin(), indices.end());
368 
369  // Generate a random set of order of subsignature_size_ bits
370  for (unsigned int i = 0; i < key_size_; ++i) {
371  size_t index = indices[i];
372 
373  // Set that bit in the mask
374  size_t divisor = CHAR_BIT * sizeof(size_t);
375  size_t idx = index / divisor; //pick the right size_t index
376  mask_[idx] |= size_t(1) << (index % divisor); //use modulo to find the bit offset
377  }
378 
379  // Set to 1 if you want to display the mask for debug
380 #if 0
381  {
382  size_t bcount = 0;
383  BOOST_FOREACH(size_t mask_block, mask_){
384  out << std::setw(sizeof(size_t) * CHAR_BIT / 4) << std::setfill('0') << std::hex << mask_block
385  << std::endl;
386  bcount += __builtin_popcountll(mask_block);
387  }
388  out << "bit count : " << std::dec << bcount << std::endl;
389  out << "mask size : " << mask_.size() << std::endl;
390  return out;
391  }
392 #endif
393 }
394 
398 template<>
399 inline size_t LshTable<unsigned char>::getKey(const unsigned char* feature) const
400 {
401  // no need to check if T is dividable by sizeof(size_t) like in the Hamming
402  // distance computation as we have a mask
403  const size_t* feature_block_ptr = reinterpret_cast<const size_t*> (feature);
404 
405  // Figure out the subsignature of the feature
406  // Given the feature ABCDEF, and the mask 001011, the output will be
407  // 000CEF
408  size_t subsignature = 0;
409  size_t bit_index = 1;
410 
411  for (std::vector<size_t>::const_iterator pmask_block = mask_.begin(); pmask_block != mask_.end(); ++pmask_block) {
412  // get the mask and signature blocks
413  size_t feature_block = *feature_block_ptr;
414  size_t mask_block = *pmask_block;
415  while (mask_block) {
416  // Get the lowest set bit in the mask block
417  size_t lowest_bit = mask_block & (-(ptrdiff_t)mask_block);
418  // Add it to the current subsignature if necessary
419  subsignature += (feature_block & lowest_bit) ? bit_index : 0;
420  // Reset the bit in the mask block
421  mask_block ^= lowest_bit;
422  // increment the bit index for the subsignature
423  bit_index <<= 1;
424  }
425  // Check the next feature block
426  ++feature_block_ptr;
427  }
428  return subsignature;
429 }
430 
431 template<>
433 {
434  LshStats stats;
435  stats.bucket_size_mean_ = 0;
436  if ((buckets_speed_.empty()) && (buckets_space_.empty())) {
437  stats.n_buckets_ = 0;
438  stats.bucket_size_median_ = 0;
439  stats.bucket_size_min_ = 0;
440  stats.bucket_size_max_ = 0;
441  return stats;
442  }
443 
444  if (!buckets_speed_.empty()) {
445  for (BucketsSpeed::const_iterator pbucket = buckets_speed_.begin(); pbucket != buckets_speed_.end(); ++pbucket) {
446  stats.bucket_sizes_.push_back(pbucket->size());
447  stats.bucket_size_mean_ += pbucket->size();
448  }
449  stats.bucket_size_mean_ /= buckets_speed_.size();
450  stats.n_buckets_ = buckets_speed_.size();
451  }
452  else {
453  for (BucketsSpace::const_iterator x = buckets_space_.begin(); x != buckets_space_.end(); ++x) {
454  stats.bucket_sizes_.push_back(x->second.size());
455  stats.bucket_size_mean_ += x->second.size();
456  }
457  stats.bucket_size_mean_ /= buckets_space_.size();
458  stats.n_buckets_ = buckets_space_.size();
459  }
460 
461  std::sort(stats.bucket_sizes_.begin(), stats.bucket_sizes_.end());
462 
463  // BOOST_FOREACH(int size, stats.bucket_sizes_)
464  // std::cout << size << " ";
465  // std::cout << std::endl;
466  stats.bucket_size_median_ = stats.bucket_sizes_[stats.bucket_sizes_.size() / 2];
467  stats.bucket_size_min_ = stats.bucket_sizes_.front();
468  stats.bucket_size_max_ = stats.bucket_sizes_.back();
469 
470  // TODO compute mean and std
471  /*float mean, stddev;
472  stats.bucket_size_mean_ = mean;
473  stats.bucket_size_std_dev = stddev;*/
474 
475  // Include a histogram of the buckets
476  unsigned int bin_start = 0;
477  unsigned int bin_end = 20;
478  bool is_new_bin = true;
479  for (std::vector<unsigned int>::iterator iterator = stats.bucket_sizes_.begin(), end = stats.bucket_sizes_.end(); iterator
480  != end; )
481  if (*iterator < bin_end) {
482  if (is_new_bin) {
483  stats.size_histogram_.push_back(std::vector<unsigned int>(3, 0));
484  stats.size_histogram_.back()[0] = bin_start;
485  stats.size_histogram_.back()[1] = bin_end - 1;
486  is_new_bin = false;
487  }
488  ++stats.size_histogram_.back()[2];
489  ++iterator;
490  }
491  else {
492  bin_start += 20;
493  bin_end += 20;
494  is_new_bin = true;
495  }
496 
497  return stats;
498 }
499 
500 // End the two namespaces
501 }
502 }
503 
505 
506 #endif /* FLANN_LSH_TABLE_H_ */
unsigned int key_size_
Definition: lsh_table.h:345
x
ROSCONSOLE_DECL void initialize()
std::ostream & operator<<(std::ostream &out, const LshStats &stats)
Definition: lsh_table.h:96
void add(unsigned int value, const ElementType *feature)
Definition: lsh_table.h:161
std::vector< unsigned int > bucket_sizes_
Definition: lsh_table.h:79
BucketsSpace buckets_space_
Definition: lsh_table.h:333
size_t getKey(const ElementType *) const
Definition: lsh_table.h:234
std::vector< std::vector< unsigned int > > size_histogram_
Definition: lsh_table.h:88
uint32_t FeatureIndex
Definition: lsh_table.h:64
LshTable(unsigned int, unsigned int)
Definition: lsh_table.h:151
void add(const std::vector< std::pair< size_t, ElementType *> > &features)
Definition: lsh_table.h:188
size_t bucket_size_median_
Definition: lsh_table.h:82
SpeedLevel speed_level_
Definition: lsh_table.h:336
detail::uint32 uint32_t
Definition: fwd.hpp:916
GLM_FUNC_DECL genType max(genType const &x, genType const &y)
void serialize(Archive &ar)
Definition: lsh_table.h:301
DynamicBitset key_bitset_
Definition: lsh_table.h:341
std::map< BucketKey, Bucket > BucketsSpace
Definition: lsh_table.h:133
size_t bucket_size_std_dev
Definition: lsh_table.h:85
unsigned int BucketKey
Definition: lsh_table.h:67
std::vector< size_t > mask_
Definition: lsh_table.h:351
std::vector< FeatureIndex > Bucket
Definition: lsh_table.h:71
void initialize(size_t key_size)
Definition: lsh_table.h:259
std::vector< Bucket > BucketsSpeed
Definition: lsh_table.h:138
const Bucket * getBucketFromKey(BucketKey key) const
Definition: lsh_table.h:205
GLM_FUNC_DECL genType ceil(genType const &x)
end
BucketsSpeed buckets_speed_
Definition: lsh_table.h:329
LshStats getStats() const


rtabmap
Author(s): Mathieu Labbe
autogenerated on Mon Jan 23 2023 03:37:28