Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035 #ifndef RTABMAP_FLANN_LSH_TABLE_H_
00036 #define RTABMAP_FLANN_LSH_TABLE_H_
00037
00038 #include <algorithm>
00039 #include <iostream>
00040 #include <iomanip>
00041 #include <limits.h>
00042
00043 #if RTFLANN_USE_UNORDERED_MAP
00044 #include <unordered_map>
00045 #else
00046 #include <map>
00047 #endif
00048 #include <math.h>
00049 #include <stddef.h>
00050
00051 #include "rtflann/util/dynamic_bitset.h"
00052 #include "rtflann/util/matrix.h"
00053
00054 namespace rtflann
00055 {
00056
00057 namespace lsh
00058 {
00059
00061
00064 typedef uint32_t FeatureIndex;
00067 typedef unsigned int BucketKey;
00068
00071 typedef std::vector<FeatureIndex> Bucket;
00072
00074
00077 struct LshStats
00078 {
00079 std::vector<unsigned int> bucket_sizes_;
00080 size_t n_buckets_;
00081 size_t bucket_size_mean_;
00082 size_t bucket_size_median_;
00083 size_t bucket_size_min_;
00084 size_t bucket_size_max_;
00085 size_t bucket_size_std_dev;
00088 std::vector<std::vector<unsigned int> > size_histogram_;
00089 };
00090
00096 inline std::ostream& operator <<(std::ostream& out, const LshStats& stats)
00097 {
00098 size_t w = 20;
00099 out << "Lsh Table Stats:\n" << std::setw(w) << std::setiosflags(std::ios::right) << "N buckets : "
00100 << stats.n_buckets_ << "\n" << std::setw(w) << std::setiosflags(std::ios::right) << "mean size : "
00101 << std::setiosflags(std::ios::left) << stats.bucket_size_mean_ << "\n" << std::setw(w)
00102 << std::setiosflags(std::ios::right) << "median size : " << stats.bucket_size_median_ << "\n" << std::setw(w)
00103 << std::setiosflags(std::ios::right) << "min size : " << std::setiosflags(std::ios::left)
00104 << stats.bucket_size_min_ << "\n" << std::setw(w) << std::setiosflags(std::ios::right) << "max size : "
00105 << std::setiosflags(std::ios::left) << stats.bucket_size_max_;
00106
00107
00108 out << std::endl << std::setw(w) << std::setiosflags(std::ios::right) << "histogram : "
00109 << std::setiosflags(std::ios::left);
00110 for (std::vector<std::vector<unsigned int> >::const_iterator iterator = stats.size_histogram_.begin(), end =
00111 stats.size_histogram_.end(); iterator != end; ++iterator) out << (*iterator)[0] << "-" << (*iterator)[1] << ": " << (*iterator)[2] << ", ";
00112
00113 return out;
00114 }
00115
00116
00118
00124 template<typename ElementType>
00125 class LshTable
00126 {
00127 public:
00130 #if RTFLANN_USE_UNORDERED_MAP
00131 typedef std::unordered_map<BucketKey, Bucket> BucketsSpace;
00132 #else
00133 typedef std::map<BucketKey, Bucket> BucketsSpace;
00134 #endif
00135
00138 typedef std::vector<Bucket> BucketsSpeed;
00139
00142 LshTable()
00143 {
00144 }
00145
00151 LshTable(unsigned int , unsigned int )
00152 {
00153 std::cerr << "LSH is not implemented for that type" << std::endl;
00154 throw;
00155 }
00156
00161 void add(unsigned int value, const ElementType* feature)
00162 {
00163
00164 BucketKey key = getKey(feature);
00165
00166 switch (speed_level_) {
00167 case kArray:
00168
00169 buckets_speed_[key].push_back(value);
00170 break;
00171 case kBitsetHash:
00172
00173 key_bitset_.set(key);
00174 buckets_space_[key].push_back(value);
00175 break;
00176 case kHash:
00177 {
00178
00179 buckets_space_[key].push_back(value);
00180 break;
00181 }
00182 }
00183 }
00184
00188 void add(const std::vector< std::pair<size_t, ElementType*> >& features)
00189 {
00190 #if RTFLANN_USE_UNORDERED_MAP
00191 buckets_space_.rehash((buckets_space_.size() + features.size()) * 1.2);
00192 #endif
00193
00194 for (size_t i = 0; i < features.size(); ++i) {
00195 add(features[i].first, features[i].second);
00196 }
00197
00198 optimize();
00199 }
00200
00205 inline const Bucket* getBucketFromKey(BucketKey key) const
00206 {
00207
00208 switch (speed_level_) {
00209 case kArray:
00210
00211 return &buckets_speed_[key];
00212 break;
00213 case kBitsetHash:
00214
00215 if (key_bitset_.test(key)) return &buckets_space_.find(key)->second;
00216 else return 0;
00217 break;
00218 case kHash:
00219 {
00220
00221 BucketsSpace::const_iterator bucket_it, bucket_end = buckets_space_.end();
00222 bucket_it = buckets_space_.find(key);
00223
00224 if (bucket_it == bucket_end) return 0;
00225 else return &bucket_it->second;
00226 break;
00227 }
00228 }
00229 return 0;
00230 }
00231
00234 size_t getKey(const ElementType* ) const
00235 {
00236 std::cerr << "LSH is not implemented for that type" << std::endl;
00237 throw;
00238 return 1;
00239 }
00240
00244 LshStats getStats() const;
00245
00246 private:
00252 enum SpeedLevel
00253 {
00254 kArray, kBitsetHash, kHash
00255 };
00256
00259 void initialize(size_t key_size)
00260 {
00261 speed_level_ = kHash;
00262 key_size_ = key_size;
00263 }
00264
00267 void optimize()
00268 {
00269
00270 if (speed_level_ == kArray) return;
00271
00272
00273 if (buckets_space_.size() > ((size_t(1) << key_size_) / 2)) {
00274 speed_level_ = kArray;
00275
00276 buckets_speed_.resize(size_t(1) << key_size_);
00277 for (BucketsSpace::const_iterator key_bucket = buckets_space_.begin(); key_bucket != buckets_space_.end(); ++key_bucket) buckets_speed_[key_bucket->first] = key_bucket->second;
00278
00279
00280 buckets_space_.clear();
00281 return;
00282 }
00283
00284
00285
00286 if (((std::max(buckets_space_.size(), buckets_speed_.size()) * CHAR_BIT * 3 * sizeof(BucketKey)) / 10
00287 >= size_t(size_t(1) << key_size_)) || (key_size_ <= 32)) {
00288 speed_level_ = kBitsetHash;
00289 key_bitset_.resize(size_t(1) << key_size_);
00290 key_bitset_.reset();
00291
00292 for (BucketsSpace::const_iterator key_bucket = buckets_space_.begin(); key_bucket != buckets_space_.end(); ++key_bucket) key_bitset_.set(key_bucket->first);
00293 }
00294 else {
00295 speed_level_ = kHash;
00296 key_bitset_.clear();
00297 }
00298 }
00299
00300 template<typename Archive>
00301 void serialize(Archive& ar)
00302 {
00303 int val;
00304 if (Archive::is_saving::value) {
00305 val = (int)speed_level_;
00306 }
00307 ar & val;
00308 if (Archive::is_loading::value) {
00309 speed_level_ = (SpeedLevel) val;
00310 }
00311
00312 ar & key_size_;
00313 ar & mask_;
00314
00315 if (speed_level_==kArray) {
00316 ar & buckets_speed_;
00317 }
00318 if (speed_level_==kBitsetHash || speed_level_==kHash) {
00319 ar & buckets_space_;
00320 }
00321 if (speed_level_==kBitsetHash) {
00322 ar & key_bitset_;
00323 }
00324 }
00325 friend struct serialization::access;
00326
00329 BucketsSpeed buckets_speed_;
00330
00333 BucketsSpace buckets_space_;
00334
00336 SpeedLevel speed_level_;
00337
00341 DynamicBitset key_bitset_;
00342
00345 unsigned int key_size_;
00346
00347
00351 std::vector<size_t> mask_;
00352 };
00353
00355
00356
00357 template<>
00358 inline LshTable<unsigned char>::LshTable(unsigned int feature_size, unsigned int subsignature_size)
00359 {
00360 initialize(subsignature_size);
00361
00362 mask_ = std::vector<size_t>((size_t)ceil((float)(feature_size * sizeof(char)) / (float)sizeof(size_t)), 0);
00363
00364
00365 std::vector<size_t> indices(feature_size * CHAR_BIT);
00366 for (size_t i = 0; i < feature_size * CHAR_BIT; ++i) indices[i] = i;
00367 std::random_shuffle(indices.begin(), indices.end());
00368
00369
00370 for (unsigned int i = 0; i < key_size_; ++i) {
00371 size_t index = indices[i];
00372
00373
00374 size_t divisor = CHAR_BIT * sizeof(size_t);
00375 size_t idx = index / divisor;
00376 mask_[idx] |= size_t(1) << (index % divisor);
00377 }
00378
00379
00380 #if 0
00381 {
00382 size_t bcount = 0;
00383 BOOST_FOREACH(size_t mask_block, mask_){
00384 out << std::setw(sizeof(size_t) * CHAR_BIT / 4) << std::setfill('0') << std::hex << mask_block
00385 << std::endl;
00386 bcount += __builtin_popcountll(mask_block);
00387 }
00388 out << "bit count : " << std::dec << bcount << std::endl;
00389 out << "mask size : " << mask_.size() << std::endl;
00390 return out;
00391 }
00392 #endif
00393 }
00394
00398 template<>
00399 inline size_t LshTable<unsigned char>::getKey(const unsigned char* feature) const
00400 {
00401
00402
00403 const size_t* feature_block_ptr = reinterpret_cast<const size_t*> (feature);
00404
00405
00406
00407
00408 size_t subsignature = 0;
00409 size_t bit_index = 1;
00410
00411 for (std::vector<size_t>::const_iterator pmask_block = mask_.begin(); pmask_block != mask_.end(); ++pmask_block) {
00412
00413 size_t feature_block = *feature_block_ptr;
00414 size_t mask_block = *pmask_block;
00415 while (mask_block) {
00416
00417 size_t lowest_bit = mask_block & (-(ptrdiff_t)mask_block);
00418
00419 subsignature += (feature_block & lowest_bit) ? bit_index : 0;
00420
00421 mask_block ^= lowest_bit;
00422
00423 bit_index <<= 1;
00424 }
00425
00426 ++feature_block_ptr;
00427 }
00428 return subsignature;
00429 }
00430
00431 template<>
00432 inline LshStats LshTable<unsigned char>::getStats() const
00433 {
00434 LshStats stats;
00435 stats.bucket_size_mean_ = 0;
00436 if ((buckets_speed_.empty()) && (buckets_space_.empty())) {
00437 stats.n_buckets_ = 0;
00438 stats.bucket_size_median_ = 0;
00439 stats.bucket_size_min_ = 0;
00440 stats.bucket_size_max_ = 0;
00441 return stats;
00442 }
00443
00444 if (!buckets_speed_.empty()) {
00445 for (BucketsSpeed::const_iterator pbucket = buckets_speed_.begin(); pbucket != buckets_speed_.end(); ++pbucket) {
00446 stats.bucket_sizes_.push_back(pbucket->size());
00447 stats.bucket_size_mean_ += pbucket->size();
00448 }
00449 stats.bucket_size_mean_ /= buckets_speed_.size();
00450 stats.n_buckets_ = buckets_speed_.size();
00451 }
00452 else {
00453 for (BucketsSpace::const_iterator x = buckets_space_.begin(); x != buckets_space_.end(); ++x) {
00454 stats.bucket_sizes_.push_back(x->second.size());
00455 stats.bucket_size_mean_ += x->second.size();
00456 }
00457 stats.bucket_size_mean_ /= buckets_space_.size();
00458 stats.n_buckets_ = buckets_space_.size();
00459 }
00460
00461 std::sort(stats.bucket_sizes_.begin(), stats.bucket_sizes_.end());
00462
00463
00464
00465
00466 stats.bucket_size_median_ = stats.bucket_sizes_[stats.bucket_sizes_.size() / 2];
00467 stats.bucket_size_min_ = stats.bucket_sizes_.front();
00468 stats.bucket_size_max_ = stats.bucket_sizes_.back();
00469
00470
00471
00472
00473
00474
00475
00476 unsigned int bin_start = 0;
00477 unsigned int bin_end = 20;
00478 bool is_new_bin = true;
00479 for (std::vector<unsigned int>::iterator iterator = stats.bucket_sizes_.begin(), end = stats.bucket_sizes_.end(); iterator
00480 != end; )
00481 if (*iterator < bin_end) {
00482 if (is_new_bin) {
00483 stats.size_histogram_.push_back(std::vector<unsigned int>(3, 0));
00484 stats.size_histogram_.back()[0] = bin_start;
00485 stats.size_histogram_.back()[1] = bin_end - 1;
00486 is_new_bin = false;
00487 }
00488 ++stats.size_histogram_.back()[2];
00489 ++iterator;
00490 }
00491 else {
00492 bin_start += 20;
00493 bin_end += 20;
00494 is_new_bin = true;
00495 }
00496
00497 return stats;
00498 }
00499
00500
00501 }
00502 }
00503
00505
00506 #endif