00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 #ifndef RTABMAP_FLANN_AUTOTUNED_INDEX_H_
00031 #define RTABMAP_FLANN_AUTOTUNED_INDEX_H_
00032
00033 #include "rtflann/general.h"
00034 #include "rtflann/algorithms/nn_index.h"
00035 #include "rtflann/nn/ground_truth.h"
00036 #include "rtflann/nn/index_testing.h"
00037 #include "rtflann/util/sampling.h"
00038 #include "rtflann/algorithms/kdtree_index.h"
00039 #include "rtflann/algorithms/kdtree_single_index.h"
00040 #include "rtflann/algorithms/kmeans_index.h"
00041 #include "rtflann/algorithms/composite_index.h"
00042 #include "rtflann/algorithms/linear_index.h"
00043 #include "rtflann/util/logger.h"
00044
00045
00046 namespace rtflann
00047 {
00048
00049 template<typename Distance>
00050 inline NNIndex<Distance>*
00051 create_index_by_type(const flann_algorithm_t index_type,
00052 const Matrix<typename Distance::ElementType>& dataset, const IndexParams& params, const Distance& distance = Distance());
00053
00054
00055 struct AutotunedIndexParams : public IndexParams
00056 {
00057 AutotunedIndexParams(float target_precision = 0.8, float build_weight = 0.01, float memory_weight = 0, float sample_fraction = 0.1)
00058 {
00059 (*this)["algorithm"] = FLANN_INDEX_AUTOTUNED;
00060
00061 (*this)["target_precision"] = target_precision;
00062
00063 (*this)["build_weight"] = build_weight;
00064
00065 (*this)["memory_weight"] = memory_weight;
00066
00067 (*this)["sample_fraction"] = sample_fraction;
00068 }
00069 };
00070
00071
00072 template <typename Distance>
00073 class AutotunedIndex : public NNIndex<Distance>
00074 {
00075 public:
00076 typedef typename Distance::ElementType ElementType;
00077 typedef typename Distance::ResultType DistanceType;
00078
00079 typedef NNIndex<Distance> BaseClass;
00080
00081 typedef AutotunedIndex<Distance> IndexType;
00082
00083 typedef bool needs_kdtree_distance;
00084
00085 AutotunedIndex(const Matrix<ElementType>& inputData, const IndexParams& params = AutotunedIndexParams(), Distance d = Distance()) :
00086 BaseClass(params, d), bestIndex_(NULL), speedup_(0), dataset_(inputData)
00087 {
00088 target_precision_ = get_param(params, "target_precision",0.8f);
00089 build_weight_ = get_param(params,"build_weight", 0.01f);
00090 memory_weight_ = get_param(params, "memory_weight", 0.0f);
00091 sample_fraction_ = get_param(params,"sample_fraction", 0.1f);
00092 }
00093
00094 AutotunedIndex(const IndexParams& params = AutotunedIndexParams(), Distance d = Distance()) :
00095 BaseClass(params, d), bestIndex_(NULL), speedup_(0)
00096 {
00097 target_precision_ = get_param(params, "target_precision",0.8f);
00098 build_weight_ = get_param(params,"build_weight", 0.01f);
00099 memory_weight_ = get_param(params, "memory_weight", 0.0f);
00100 sample_fraction_ = get_param(params,"sample_fraction", 0.1f);
00101 }
00102
00103 AutotunedIndex(const AutotunedIndex& other) : BaseClass(other),
00104 bestParams_(other.bestParams_),
00105 bestSearchParams_(other.bestSearchParams_),
00106 speedup_(other.speedup_),
00107 dataset_(other.dataset_),
00108 target_precision_(other.target_precision_),
00109 build_weight_(other.build_weight_),
00110 memory_weight_(other.memory_weight_),
00111 sample_fraction_(other.sample_fraction_)
00112 {
00113 bestIndex_ = other.bestIndex_->clone();
00114 }
00115
00116 AutotunedIndex& operator=(AutotunedIndex other)
00117 {
00118 this->swap(other);
00119 return * this;
00120 }
00121
00122 virtual ~AutotunedIndex()
00123 {
00124 delete bestIndex_;
00125 }
00126
00127 BaseClass* clone() const
00128 {
00129 return new AutotunedIndex(*this);
00130 }
00131
00135 void buildIndex()
00136 {
00137 bestParams_ = estimateBuildParams();
00138 Logger::info("----------------------------------------------------\n");
00139 Logger::info("Autotuned parameters:\n");
00140 if (Logger::getLevel()>=FLANN_LOG_INFO)
00141 print_params(bestParams_);
00142 Logger::info("----------------------------------------------------\n");
00143
00144 flann_algorithm_t index_type = get_param<flann_algorithm_t>(bestParams_,"algorithm");
00145 bestIndex_ = create_index_by_type(index_type, dataset_, bestParams_, distance_);
00146 bestIndex_->buildIndex();
00147 speedup_ = estimateSearchParams(bestSearchParams_);
00148 Logger::info("----------------------------------------------------\n");
00149 Logger::info("Search parameters:\n");
00150 if (Logger::getLevel()>=FLANN_LOG_INFO)
00151 print_params(bestSearchParams_);
00152 Logger::info("----------------------------------------------------\n");
00153 bestParams_["search_params"] = bestSearchParams_;
00154 bestParams_["speedup"] = speedup_;
00155 }
00156
00157 void buildIndex(const Matrix<ElementType>& dataset)
00158 {
00159 dataset_ = dataset;
00160 this->buildIndex();
00161 }
00162
00163
00164 void addPoints(const Matrix<ElementType>& points, float rebuild_threshold = 2)
00165 {
00166 if (bestIndex_) {
00167 bestIndex_->addPoints(points, rebuild_threshold);
00168 }
00169 }
00170
00171 void removePoint(size_t id)
00172 {
00173 if (bestIndex_) {
00174 bestIndex_->removePoint(id);
00175 }
00176 }
00177
00178
00179 template<typename Archive>
00180 void serialize(Archive& ar)
00181 {
00182 ar.setObject(this);
00183
00184 ar & *static_cast<NNIndex<Distance>*>(this);
00185
00186 ar & target_precision_;
00187 ar & build_weight_;
00188 ar & memory_weight_;
00189 ar & sample_fraction_;
00190
00191 flann_algorithm_t index_type;
00192 if (Archive::is_saving::value) {
00193 index_type = get_param<flann_algorithm_t>(bestParams_,"algorithm");
00194 }
00195 ar & index_type;
00196 ar & bestSearchParams_.checks;
00197
00198 if (Archive::is_loading::value) {
00199 bestParams_["algorithm"] = index_type;
00200
00201 index_params_["algorithm"] = getType();
00202 index_params_["target_precision_"] = target_precision_;
00203 index_params_["build_weight_"] = build_weight_;
00204 index_params_["memory_weight_"] = memory_weight_;
00205 index_params_["sample_fraction_"] = sample_fraction_;
00206 }
00207 }
00208
00209 void saveIndex(FILE* stream)
00210 {
00211 {
00212 serialization::SaveArchive sa(stream);
00213 sa & *this;
00214 }
00215
00216 bestIndex_->saveIndex(stream);
00217 }
00218
00219 void loadIndex(FILE* stream)
00220 {
00221 {
00222 serialization::LoadArchive la(stream);
00223 la & *this;
00224 }
00225
00226 IndexParams params;
00227 flann_algorithm_t index_type = get_param<flann_algorithm_t>(bestParams_,"algorithm");
00228 bestIndex_ = create_index_by_type<Distance>((flann_algorithm_t)index_type, dataset_, params, distance_);
00229 bestIndex_->loadIndex(stream);
00230 }
00231
00232 int knnSearch(const Matrix<ElementType>& queries,
00233 Matrix<size_t>& indices,
00234 Matrix<DistanceType>& dists,
00235 size_t knn,
00236 const SearchParams& params) const
00237 {
00238 if (params.checks == FLANN_CHECKS_AUTOTUNED) {
00239 return bestIndex_->knnSearch(queries, indices, dists, knn, bestSearchParams_);
00240 }
00241 else {
00242 return bestIndex_->knnSearch(queries, indices, dists, knn, params);
00243 }
00244 }
00245
00246 int knnSearch(const Matrix<ElementType>& queries,
00247 std::vector< std::vector<size_t> >& indices,
00248 std::vector<std::vector<DistanceType> >& dists,
00249 size_t knn,
00250 const SearchParams& params) const
00251 {
00252 if (params.checks == FLANN_CHECKS_AUTOTUNED) {
00253 return bestIndex_->knnSearch(queries, indices, dists, knn, bestSearchParams_);
00254 }
00255 else {
00256 return bestIndex_->knnSearch(queries, indices, dists, knn, params);
00257 }
00258
00259 }
00260
00261 int radiusSearch(const Matrix<ElementType>& queries,
00262 Matrix<size_t>& indices,
00263 Matrix<DistanceType>& dists,
00264 DistanceType radius,
00265 const SearchParams& params) const
00266 {
00267 if (params.checks == FLANN_CHECKS_AUTOTUNED) {
00268 return bestIndex_->radiusSearch(queries, indices, dists, radius, bestSearchParams_);
00269 }
00270 else {
00271 return bestIndex_->radiusSearch(queries, indices, dists, radius, params);
00272 }
00273 }
00274
00275 int radiusSearch(const Matrix<ElementType>& queries,
00276 std::vector< std::vector<size_t> >& indices,
00277 std::vector<std::vector<DistanceType> >& dists,
00278 DistanceType radius,
00279 const SearchParams& params) const
00280 {
00281 if (params.checks == FLANN_CHECKS_AUTOTUNED) {
00282 return bestIndex_->radiusSearch(queries, indices, dists, radius, bestSearchParams_);
00283 }
00284 else {
00285 return bestIndex_->radiusSearch(queries, indices, dists, radius, params);
00286 }
00287 }
00288
00289
00290
00294 void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams) const
00295 {
00296
00297 assert(false);
00298 }
00299
00300 IndexParams getParameters() const
00301 {
00302 return bestParams_;
00303 }
00304
00305 FLANN_DEPRECATED SearchParams getSearchParameters() const
00306 {
00307 return bestSearchParams_;
00308 }
00309
00310 FLANN_DEPRECATED float getSpeedup() const
00311 {
00312 return speedup_;
00313 }
00314
00315
00319 size_t size() const
00320 {
00321 return bestIndex_->size();
00322 }
00323
00327 size_t veclen() const
00328 {
00329 return bestIndex_->veclen();
00330 }
00331
00335 int usedMemory() const
00336 {
00337 return bestIndex_->usedMemory();
00338 }
00339
00343 flann_algorithm_t getType() const
00344 {
00345 return FLANN_INDEX_AUTOTUNED;
00346 }
00347
00348 protected:
00349 void buildIndexImpl()
00350 {
00351
00352 }
00353
00354 void freeIndex()
00355 {
00356
00357 }
00358
00359 private:
00360
00361 struct CostData
00362 {
00363 float searchTimeCost;
00364 float buildTimeCost;
00365 float memoryCost;
00366 float totalCost;
00367 IndexParams params;
00368 };
00369
00370 void evaluate_kmeans(CostData& cost)
00371 {
00372 StartStopTimer t;
00373 int checks;
00374 const int nn = 1;
00375
00376 Logger::info("KMeansTree using params: max_iterations=%d, branching=%d\n",
00377 get_param<int>(cost.params,"iterations"),
00378 get_param<int>(cost.params,"branching"));
00379 KMeansIndex<Distance> kmeans(sampledDataset_, cost.params, distance_);
00380
00381 t.start();
00382 kmeans.buildIndex();
00383 t.stop();
00384 float buildTime = (float)t.value;
00385
00386
00387 float searchTime = test_index_precision(kmeans, sampledDataset_, testDataset_, gt_matches_, target_precision_, checks, distance_, nn);
00388
00389 float datasetMemory = float(sampledDataset_.rows * sampledDataset_.cols * sizeof(float));
00390 cost.memoryCost = (kmeans.usedMemory() + datasetMemory) / datasetMemory;
00391 cost.searchTimeCost = searchTime;
00392 cost.buildTimeCost = buildTime;
00393 Logger::info("KMeansTree buildTime=%g, searchTime=%g, build_weight=%g\n", buildTime, searchTime, build_weight_);
00394 }
00395
00396
00397 void evaluate_kdtree(CostData& cost)
00398 {
00399 StartStopTimer t;
00400 int checks;
00401 const int nn = 1;
00402
00403 Logger::info("KDTree using params: trees=%d\n", get_param<int>(cost.params,"trees"));
00404 KDTreeIndex<Distance> kdtree(sampledDataset_, cost.params, distance_);
00405
00406 t.start();
00407 kdtree.buildIndex();
00408 t.stop();
00409 float buildTime = (float)t.value;
00410
00411
00412 float searchTime = test_index_precision(kdtree, sampledDataset_, testDataset_, gt_matches_, target_precision_, checks, distance_, nn);
00413
00414 float datasetMemory = float(sampledDataset_.rows * sampledDataset_.cols * sizeof(float));
00415 cost.memoryCost = (kdtree.usedMemory() + datasetMemory) / datasetMemory;
00416 cost.searchTimeCost = searchTime;
00417 cost.buildTimeCost = buildTime;
00418 Logger::info("KDTree buildTime=%g, searchTime=%g\n", buildTime, searchTime);
00419 }
00420
00421
00422
00423
00424
00425
00426
00427
00428
00429
00430
00431
00432
00433
00434
00435
00436
00437
00438
00439
00440
00441
00442
00443
00444
00445
00446
00447
00448
00449
00450
00451
00452
00453
00454
00455
00456
00457
00458
00459
00460
00461
00462
00463
00464
00465
00466
00467
00468
00469
00470 void optimizeKMeans(std::vector<CostData>& costs)
00471 {
00472 Logger::info("KMEANS, Step 1: Exploring parameter space\n");
00473
00474
00475 int maxIterations[] = { 1, 5, 10, 15 };
00476 int branchingFactors[] = { 16, 32, 64, 128, 256 };
00477
00478 int kmeansParamSpaceSize = FLANN_ARRAY_LEN(maxIterations) * FLANN_ARRAY_LEN(branchingFactors);
00479 costs.reserve(costs.size() + kmeansParamSpaceSize);
00480
00481
00482 for (size_t i = 0; i < FLANN_ARRAY_LEN(maxIterations); ++i) {
00483 for (size_t j = 0; j < FLANN_ARRAY_LEN(branchingFactors); ++j) {
00484 CostData cost;
00485 cost.params["algorithm"] = FLANN_INDEX_KMEANS;
00486 cost.params["centers_init"] = FLANN_CENTERS_RANDOM;
00487 cost.params["iterations"] = maxIterations[i];
00488 cost.params["branching"] = branchingFactors[j];
00489
00490 evaluate_kmeans(cost);
00491 costs.push_back(cost);
00492 }
00493 }
00494
00495
00496
00497
00498
00499
00500
00501
00502
00503
00504
00505
00506
00507
00508
00509
00510
00511
00512
00513
00514
00515 }
00516
00517
00518 void optimizeKDTree(std::vector<CostData>& costs)
00519 {
00520 Logger::info("KD-TREE, Step 1: Exploring parameter space\n");
00521
00522
00523 int testTrees[] = { 1, 4, 8, 16, 32 };
00524
00525
00526 for (size_t i = 0; i < FLANN_ARRAY_LEN(testTrees); ++i) {
00527 CostData cost;
00528 cost.params["algorithm"] = FLANN_INDEX_KDTREE;
00529 cost.params["trees"] = testTrees[i];
00530
00531 evaluate_kdtree(cost);
00532 costs.push_back(cost);
00533 }
00534
00535
00536
00537
00538
00539
00540
00541
00542
00543
00544
00545
00546
00547
00548
00549
00550
00551
00552
00553 }
00554
00560 IndexParams estimateBuildParams()
00561 {
00562 std::vector<CostData> costs;
00563
00564 int sampleSize = int(sample_fraction_ * dataset_.rows);
00565 int testSampleSize = std::min(sampleSize / 10, 1000);
00566
00567 Logger::info("Entering autotuning, dataset size: %d, sampleSize: %d, testSampleSize: %d, target precision: %g\n", dataset_.rows, sampleSize, testSampleSize, target_precision_);
00568
00569
00570
00571 if (testSampleSize < 10) {
00572 Logger::info("Choosing linear, dataset too small\n");
00573 return LinearIndexParams();
00574 }
00575
00576
00577 sampledDataset_ = random_sample(dataset_, sampleSize);
00578
00579 testDataset_ = random_sample(sampledDataset_, testSampleSize, true);
00580
00581
00582 Logger::info("Computing ground truth... \n");
00583 gt_matches_ = Matrix<size_t>(new size_t[testDataset_.rows], testDataset_.rows, 1);
00584 StartStopTimer t;
00585 int repeats = 0;
00586 t.reset();
00587 while (t.value<0.2) {
00588 repeats++;
00589 t.start();
00590 compute_ground_truth<Distance>(sampledDataset_, testDataset_, gt_matches_, 0, distance_);
00591 t.stop();
00592 }
00593
00594 CostData linear_cost;
00595 linear_cost.searchTimeCost = (float)t.value/repeats;
00596 linear_cost.buildTimeCost = 0;
00597 linear_cost.memoryCost = 0;
00598 linear_cost.params["algorithm"] = FLANN_INDEX_LINEAR;
00599
00600 costs.push_back(linear_cost);
00601
00602
00603 Logger::info("Autotuning parameters...\n");
00604
00605 optimizeKMeans(costs);
00606 optimizeKDTree(costs);
00607
00608 float bestTimeCost = costs[0].buildTimeCost * build_weight_ + costs[0].searchTimeCost;
00609 for (size_t i = 0; i < costs.size(); ++i) {
00610 float timeCost = costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost;
00611 Logger::debug("Time cost: %g\n", timeCost);
00612 if (timeCost < bestTimeCost) {
00613 bestTimeCost = timeCost;
00614 }
00615 }
00616 Logger::debug("Best time cost: %g\n", bestTimeCost);
00617
00618 IndexParams bestParams = costs[0].params;
00619 if (bestTimeCost > 0) {
00620 float bestCost = (costs[0].buildTimeCost * build_weight_ + costs[0].searchTimeCost) / bestTimeCost;
00621 for (size_t i = 0; i < costs.size(); ++i) {
00622 float crtCost = (costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost) / bestTimeCost +
00623 memory_weight_ * costs[i].memoryCost;
00624 Logger::debug("Cost: %g\n", crtCost);
00625 if (crtCost < bestCost) {
00626 bestCost = crtCost;
00627 bestParams = costs[i].params;
00628 }
00629 }
00630 Logger::debug("Best cost: %g\n", bestCost);
00631 }
00632
00633 delete[] gt_matches_.ptr();
00634 delete[] testDataset_.ptr();
00635 delete[] sampledDataset_.ptr();
00636
00637 return bestParams;
00638 }
00639
00640
00641
00647 float estimateSearchParams(SearchParams& searchParams)
00648 {
00649 const int nn = 1;
00650 const size_t SAMPLE_COUNT = 1000;
00651
00652 assert(bestIndex_ != NULL);
00653
00654 float speedup = 0;
00655
00656 int samples = (int)std::min(dataset_.rows / 10, SAMPLE_COUNT);
00657 if (samples > 0) {
00658 Matrix<ElementType> testDataset = random_sample(dataset_, samples);
00659
00660 Logger::info("Computing ground truth\n");
00661
00662
00663 Matrix<size_t> gt_matches(new size_t[testDataset.rows], testDataset.rows, 1);
00664 StartStopTimer t;
00665 int repeats = 0;
00666 t.reset();
00667 while (t.value<0.2) {
00668 repeats++;
00669 t.start();
00670 compute_ground_truth<Distance>(dataset_, testDataset, gt_matches, 1, distance_);
00671 t.stop();
00672 }
00673 float linear = (float)t.value/repeats;
00674
00675 int checks;
00676 Logger::info("Estimating number of checks\n");
00677
00678 float searchTime;
00679 float cb_index;
00680 if (bestIndex_->getType() == FLANN_INDEX_KMEANS) {
00681 Logger::info("KMeans algorithm, estimating cluster border factor\n");
00682 KMeansIndex<Distance>* kmeans = static_cast<KMeansIndex<Distance>*>(bestIndex_);
00683 float bestSearchTime = -1;
00684 float best_cb_index = -1;
00685 int best_checks = -1;
00686 for (cb_index = 0; cb_index < 1.1f; cb_index += 0.2f) {
00687 kmeans->set_cb_index(cb_index);
00688 searchTime = test_index_precision(*kmeans, dataset_, testDataset, gt_matches, target_precision_, checks, distance_, nn, 1);
00689 if ((searchTime < bestSearchTime) || (bestSearchTime == -1)) {
00690 bestSearchTime = searchTime;
00691 best_cb_index = cb_index;
00692 best_checks = checks;
00693 }
00694 }
00695 searchTime = bestSearchTime;
00696 cb_index = best_cb_index;
00697 checks = best_checks;
00698
00699 kmeans->set_cb_index(best_cb_index);
00700 Logger::info("Optimum cb_index: %g\n", cb_index);
00701 bestParams_["cb_index"] = cb_index;
00702 }
00703 else {
00704 searchTime = test_index_precision(*bestIndex_, dataset_, testDataset, gt_matches, target_precision_, checks, distance_, nn, 1);
00705 }
00706
00707 Logger::info("Required number of checks: %d \n", checks);
00708 searchParams.checks = checks;
00709
00710 speedup = linear / searchTime;
00711
00712 delete[] gt_matches.ptr();
00713 delete[] testDataset.ptr();
00714 }
00715
00716 return speedup;
00717 }
00718
00719
00720 void swap(AutotunedIndex& other)
00721 {
00722 BaseClass::swap(other);
00723 std::swap(bestIndex_, other.bestIndex_);
00724 std::swap(bestParams_, other.bestParams_);
00725 std::swap(bestSearchParams_, other.bestSearchParams_);
00726 std::swap(speedup_, other.speedup_);
00727 std::swap(dataset_, other.dataset_);
00728 std::swap(target_precision_, other.target_precision_);
00729 std::swap(build_weight_, other.build_weight_);
00730 std::swap(memory_weight_, other.memory_weight_);
00731 std::swap(sample_fraction_, other.sample_fraction_);
00732 }
00733
00734 private:
00735 NNIndex<Distance>* bestIndex_;
00736
00737 IndexParams bestParams_;
00738 SearchParams bestSearchParams_;
00739
00740 Matrix<ElementType> sampledDataset_;
00741 Matrix<ElementType> testDataset_;
00742 Matrix<size_t> gt_matches_;
00743
00744 float speedup_;
00745
00749 Matrix<ElementType> dataset_;
00750
00754 float target_precision_;
00755 float build_weight_;
00756 float memory_weight_;
00757 float sample_fraction_;
00758
00759 USING_BASECLASS_SYMBOLS
00760 };
00761 }
00762
00763 #endif