00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031 #ifndef _OPENCV_AUTOTUNEDINDEX_H_
00032 #define _OPENCV_AUTOTUNEDINDEX_H_
00033
00034 #include "opencv2/flann/general.h"
00035 #include "opencv2/flann/nn_index.h"
00036 #include "opencv2/flann/ground_truth.h"
00037 #include "opencv2/flann/index_testing.h"
00038 #include "opencv2/flann/sampling.h"
00039 #include "opencv2/flann/all_indices.h"
00040
00041 namespace cvflann
00042 {
00043
00044 struct AutotunedIndexParams : public IndexParams {
00045 AutotunedIndexParams( float target_precision_ = 0.8, float build_weight_ = 0.01,
00046 float memory_weight_ = 0, float sample_fraction_ = 0.1) :
00047 IndexParams(AUTOTUNED),
00048 target_precision(target_precision_),
00049 build_weight(build_weight_),
00050 memory_weight(memory_weight_),
00051 sample_fraction(sample_fraction_) {};
00052
00053 float target_precision;
00054 float build_weight;
00055 float memory_weight;
00056 float sample_fraction;
00057
00058 flann_algorithm_t getIndexType() const { return algorithm; }
00059
00060 void print() const
00061 {
00062 logger().info("Index type: %d\n",(int)algorithm);
00063 logger().info("logger(). precision: %g\n", target_precision);
00064 logger().info("Build weight: %g\n", build_weight);
00065 logger().info("Memory weight: %g\n", memory_weight);
00066 logger().info("Sample fraction: %g\n", sample_fraction);
00067 }
00068 };
00069
00070
00071 template <typename ELEM_TYPE, typename DIST_TYPE = typename DistType<ELEM_TYPE>::type >
00072 class AutotunedIndex : public NNIndex<ELEM_TYPE>
00073 {
00074 NNIndex<ELEM_TYPE>* bestIndex;
00075
00076 IndexParams* bestParams;
00077 SearchParams bestSearchParams;
00078
00079 Matrix<ELEM_TYPE> sampledDataset;
00080 Matrix<ELEM_TYPE> testDataset;
00081 Matrix<int> gt_matches;
00082
00083 float speedup;
00084
00088 const Matrix<ELEM_TYPE> dataset;
00089
00093 const AutotunedIndexParams& index_params;
00094
00095 public:
00096
00097 AutotunedIndex(const Matrix<ELEM_TYPE>& inputData, const AutotunedIndexParams& params = AutotunedIndexParams() ) :
00098 dataset(inputData), index_params(params)
00099 {
00100 bestIndex = NULL;
00101 bestParams = NULL;
00102 }
00103
00104 virtual ~AutotunedIndex()
00105 {
00106 if (bestIndex!=NULL) {
00107 delete bestIndex;
00108 }
00109 if (bestParams!=NULL) {
00110 delete bestParams;
00111 }
00112 };
00113
00117 virtual void buildIndex()
00118 {
00119 bestParams = estimateBuildParams();
00120 logger().info("----------------------------------------------------\n");
00121 logger().info("Autotuned parameters:\n");
00122 bestParams->print();
00123 logger().info("----------------------------------------------------\n");
00124 flann_algorithm_t index_type = bestParams->getIndexType();
00125 switch (index_type) {
00126 case LINEAR:
00127 bestIndex = new LinearIndex<ELEM_TYPE>(dataset, (const LinearIndexParams&)*bestParams);
00128 break;
00129 case KDTREE:
00130 bestIndex = new KDTreeIndex<ELEM_TYPE>(dataset, (const KDTreeIndexParams&)*bestParams);
00131 break;
00132 case KMEANS:
00133 bestIndex = new KMeansIndex<ELEM_TYPE>(dataset, (const KMeansIndexParams&)*bestParams);
00134 break;
00135 default:
00136 throw FLANNException("Unknown algorithm choosen by the autotuning, most likely a bug.");
00137 }
00138 bestIndex->buildIndex();
00139 speedup = estimateSearchParams(bestSearchParams);
00140 }
00141
00145 virtual void saveIndex(FILE* stream)
00146 {
00147 save_value(stream, (int)bestIndex->getType());
00148 bestIndex->saveIndex(stream);
00149 save_value(stream, bestSearchParams);
00150 }
00151
00155 virtual void loadIndex(FILE* stream)
00156 {
00157 int index_type;
00158 load_value(stream,index_type);
00159 IndexParams* params = ParamsFactory_instance().create((flann_algorithm_t)index_type);
00160 bestIndex = create_index_by_type(dataset, *params);
00161 bestIndex->loadIndex(stream);
00162 load_value(stream, bestSearchParams);
00163 }
00164
00168 virtual void findNeighbors(ResultSet<ELEM_TYPE>& result, const ELEM_TYPE* vec, const SearchParams& searchParams)
00169 {
00170 if (searchParams.checks==-2) {
00171 bestIndex->findNeighbors(result, vec, bestSearchParams);
00172 }
00173 else {
00174 bestIndex->findNeighbors(result, vec, searchParams);
00175 }
00176 }
00177
00178
00179 const IndexParams* getParameters() const
00180 {
00181 return bestIndex->getParameters();
00182 }
00183
00184
00188 virtual size_t size() const
00189 {
00190 return bestIndex->size();
00191 }
00192
00196 virtual size_t veclen() const
00197 {
00198 return bestIndex->veclen();
00199 }
00200
00204 virtual int usedMemory() const
00205 {
00206 return bestIndex->usedMemory();
00207 }
00208
00212 virtual flann_algorithm_t getType() const
00213 {
00214 return AUTOTUNED;
00215 }
00216
00217 private:
00218
00219 struct CostData {
00220 float searchTimeCost;
00221 float buildTimeCost;
00222 float timeCost;
00223 float memoryCost;
00224 float totalCost;
00225 };
00226
00227 typedef pair<CostData, KDTreeIndexParams> KDTreeCostData;
00228 typedef pair<CostData, KMeansIndexParams> KMeansCostData;
00229
00230
00231 void evaluate_kmeans(CostData& cost, const KMeansIndexParams& kmeans_params)
00232 {
00233 StartStopTimer t;
00234 int checks;
00235 const int nn = 1;
00236
00237 logger().info("KMeansTree using params: max_iterations=%d, branching=%d\n", kmeans_params.iterations, kmeans_params.branching);
00238 KMeansIndex<ELEM_TYPE> kmeans(sampledDataset, kmeans_params);
00239
00240 t.start();
00241 kmeans.buildIndex();
00242 t.stop();
00243 float buildTime = (float)t.value;
00244
00245
00246 float searchTime = test_index_precision(kmeans, sampledDataset, testDataset, gt_matches, index_params.target_precision, checks, nn);;
00247
00248 float datasetMemory = (float)(sampledDataset.rows*sampledDataset.cols*sizeof(float));
00249 cost.memoryCost = (kmeans.usedMemory()+datasetMemory)/datasetMemory;
00250 cost.searchTimeCost = searchTime;
00251 cost.buildTimeCost = buildTime;
00252 cost.timeCost = (buildTime*index_params.build_weight+searchTime);
00253 logger().info("KMeansTree buildTime=%g, searchTime=%g, timeCost=%g, buildTimeFactor=%g\n",buildTime, searchTime, cost.timeCost, index_params.build_weight);
00254 }
00255
00256
00257 void evaluate_kdtree(CostData& cost, const KDTreeIndexParams& kdtree_params)
00258 {
00259 StartStopTimer t;
00260 int checks;
00261 const int nn = 1;
00262
00263 logger().info("KDTree using params: trees=%d\n",kdtree_params.trees);
00264 KDTreeIndex<ELEM_TYPE> kdtree(sampledDataset, kdtree_params);
00265
00266 t.start();
00267 kdtree.buildIndex();
00268 t.stop();
00269 float buildTime = (float)t.value;
00270
00271
00272 float searchTime = test_index_precision(kdtree, sampledDataset, testDataset, gt_matches, index_params.target_precision, checks, nn);
00273
00274 float datasetMemory = (float)(sampledDataset.rows*sampledDataset.cols*sizeof(float));
00275 cost.memoryCost = (kdtree.usedMemory()+datasetMemory)/datasetMemory;
00276 cost.searchTimeCost = searchTime;
00277 cost.buildTimeCost = buildTime;
00278 cost.timeCost = (buildTime*index_params.build_weight+searchTime);
00279 logger().info("KDTree buildTime=%g, searchTime=%g, timeCost=%g\n",buildTime, searchTime, cost.timeCost);
00280 }
00281
00282
00283
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300
00301
00302
00303
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331 KMeansCostData optimizeKMeans()
00332 {
00333 logger().info("KMEANS, Step 1: Exploring parameter space\n");
00334
00335
00336 int maxIterations[] = { 1, 5, 10, 15 };
00337 int branchingFactors[] = { 16, 32, 64, 128, 256 };
00338
00339 int kmeansParamSpaceSize = ARRAY_LEN(maxIterations)*ARRAY_LEN(branchingFactors);
00340
00341 vector<KMeansCostData> kmeansCosts(kmeansParamSpaceSize);
00342
00343
00344
00345
00346 int cnt = 0;
00347 for (size_t i=0; i<ARRAY_LEN(maxIterations); ++i) {
00348 for (size_t j=0; j<ARRAY_LEN(branchingFactors); ++j) {
00349
00350 kmeansCosts[cnt].second.centers_init = CENTERS_RANDOM;
00351 kmeansCosts[cnt].second.iterations = maxIterations[i];
00352 kmeansCosts[cnt].second.branching = branchingFactors[j];
00353
00354 evaluate_kmeans(kmeansCosts[cnt].first, kmeansCosts[cnt].second);
00355
00356 int k = cnt;
00357
00358 while (k>0 && kmeansCosts[k].first.timeCost < kmeansCosts[k-1].first.timeCost) {
00359 swap(kmeansCosts[k],kmeansCosts[k-1]);
00360 --k;
00361 }
00362 ++cnt;
00363 }
00364 }
00365
00366
00367
00368
00369
00370
00371
00372
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386
00387 float optTimeCost = kmeansCosts[0].first.timeCost;
00388
00389 for (int i=0;i<kmeansParamSpaceSize;++i) {
00390 kmeansCosts[i].first.totalCost = (kmeansCosts[i].first.timeCost/optTimeCost + index_params.memory_weight * kmeansCosts[i].first.memoryCost);
00391
00392 int k = i;
00393 while (k>0 && kmeansCosts[k].first.totalCost < kmeansCosts[k-1].first.totalCost) {
00394 swap(kmeansCosts[k],kmeansCosts[k-1]);
00395 k--;
00396 }
00397 }
00398
00399 for (int i=0;i<kmeansParamSpaceSize;++i) {
00400 logger().info("KMeans, branching=%d, iterations=%d, time_cost=%g[%g] (build=%g, search=%g), memory_cost=%g, cost=%g\n",
00401 kmeansCosts[i].second.branching, kmeansCosts[i].second.iterations,
00402 kmeansCosts[i].first.timeCost,kmeansCosts[i].first.timeCost/optTimeCost,
00403 kmeansCosts[i].first.buildTimeCost, kmeansCosts[i].first.searchTimeCost,
00404 kmeansCosts[i].first.memoryCost,kmeansCosts[i].first.totalCost);
00405 }
00406
00407 return kmeansCosts[0];
00408 }
00409
00410
00411 KDTreeCostData optimizeKDTree()
00412 {
00413
00414 logger().info("KD-TREE, Step 1: Exploring parameter space\n");
00415
00416
00417 int testTrees[] = { 1, 4, 8, 16, 32 };
00418
00419 size_t kdtreeParamSpaceSize = ARRAY_LEN(testTrees);
00420 vector<KDTreeCostData> kdtreeCosts(kdtreeParamSpaceSize);
00421
00422
00423 int cnt = 0;
00424 for (size_t i=0; i<ARRAY_LEN(testTrees); ++i) {
00425 kdtreeCosts[cnt].second.trees = testTrees[i];
00426
00427 evaluate_kdtree(kdtreeCosts[cnt].first, kdtreeCosts[cnt].second);
00428
00429 int k = cnt;
00430
00431 while (k>0 && kdtreeCosts[k].first.timeCost < kdtreeCosts[k-1].first.timeCost) {
00432 swap(kdtreeCosts[k],kdtreeCosts[k-1]);
00433 --k;
00434 }
00435 ++cnt;
00436 }
00437
00438
00439
00440
00441
00442
00443
00444
00445
00446
00447
00448
00449
00450
00451
00452
00453
00454
00455
00456
00457 float optTimeCost = kdtreeCosts[0].first.timeCost;
00458
00459 for (size_t i=0;i<kdtreeParamSpaceSize;++i) {
00460 kdtreeCosts[i].first.totalCost = (kdtreeCosts[i].first.timeCost/optTimeCost + index_params.memory_weight * kdtreeCosts[i].first.memoryCost);
00461
00462 int k = (int)i;
00463 while (k>0 && kdtreeCosts[k].first.totalCost < kdtreeCosts[k-1].first.totalCost) {
00464 swap(kdtreeCosts[k],kdtreeCosts[k-1]);
00465 k--;
00466 }
00467 }
00468
00469 for (size_t i=0;i<kdtreeParamSpaceSize;++i) {
00470 logger().info("kd-tree, trees=%d, time_cost=%g[%g] (build=%g, search=%g), memory_cost=%g, cost=%g\n",
00471 kdtreeCosts[i].second.trees,kdtreeCosts[i].first.timeCost,kdtreeCosts[i].first.timeCost/optTimeCost,
00472 kdtreeCosts[i].first.buildTimeCost, kdtreeCosts[i].first.searchTimeCost,
00473 kdtreeCosts[i].first.memoryCost,kdtreeCosts[i].first.totalCost);
00474 }
00475
00476 return kdtreeCosts[0];
00477 }
00478
00484 IndexParams* estimateBuildParams()
00485 {
00486 int sampleSize = int(index_params.sample_fraction*dataset.rows);
00487 int testSampleSize = min(sampleSize/10, 1000);
00488
00489 logger().info("Entering autotuning, dataset size: %d, sampleSize: %d, testSampleSize: %d\n",dataset.rows, sampleSize, testSampleSize);
00490
00491
00492
00493 if (testSampleSize<10) {
00494 logger().info("Choosing linear, dataset too small\n");
00495 return new LinearIndexParams();
00496 }
00497
00498
00499 sampledDataset = random_sample(dataset,sampleSize);
00500
00501 testDataset = random_sample(sampledDataset,testSampleSize,true);
00502
00503
00504 logger().info("Computing ground truth... \n");
00505 gt_matches = Matrix<int>(new int[testDataset.rows],(long)testDataset.rows, 1);
00506 StartStopTimer t;
00507 t.start();
00508 compute_ground_truth(sampledDataset, testDataset, gt_matches, 0);
00509 t.stop();
00510 float bestCost = (float)t.value;
00511 IndexParams* bestParams = new LinearIndexParams();
00512
00513
00514 logger().info("Autotuning parameters...\n");
00515
00516
00517 KMeansCostData kmeansCost = optimizeKMeans();
00518 if (kmeansCost.first.totalCost<bestCost) {
00519 bestParams = new KMeansIndexParams(kmeansCost.second);
00520 bestCost = kmeansCost.first.totalCost;
00521 }
00522
00523 KDTreeCostData kdtreeCost = optimizeKDTree();
00524
00525 if (kdtreeCost.first.totalCost<bestCost) {
00526 bestParams = new KDTreeIndexParams(kdtreeCost.second);
00527 bestCost = kdtreeCost.first.totalCost;
00528 }
00529
00530 gt_matches.release();
00531 sampledDataset.release();
00532 testDataset.release();
00533
00534 return bestParams;
00535 }
00536
00537
00538
00544 float estimateSearchParams(SearchParams& searchParams)
00545 {
00546 const int nn = 1;
00547 const size_t SAMPLE_COUNT = 1000;
00548
00549 assert(bestIndex!=NULL);
00550
00551 float speedup = 0;
00552
00553 int samples = (int)min(dataset.rows/10, SAMPLE_COUNT);
00554 if (samples>0) {
00555 Matrix<ELEM_TYPE> testDataset = random_sample(dataset,samples);
00556
00557 logger().info("Computing ground truth\n");
00558
00559
00560 Matrix<int> gt_matches(new int[testDataset.rows],(long)testDataset.rows,1);
00561 StartStopTimer t;
00562 t.start();
00563 compute_ground_truth(dataset, testDataset, gt_matches,1);
00564 t.stop();
00565 float linear = (float)t.value;
00566
00567 int checks;
00568 logger().info("Estimating number of checks\n");
00569
00570 float searchTime;
00571 float cb_index;
00572 if (bestIndex->getType() == KMEANS) {
00573 logger().info("KMeans algorithm, estimating cluster border factor\n");
00574 KMeansIndex<ELEM_TYPE>* kmeans = (KMeansIndex<ELEM_TYPE>*)bestIndex;
00575 float bestSearchTime = -1;
00576 float best_cb_index = -1;
00577 int best_checks = -1;
00578 for (cb_index = 0;cb_index<1.1f; cb_index+=0.2f) {
00579 kmeans->set_cb_index(cb_index);
00580 searchTime = test_index_precision(*kmeans, dataset, testDataset, gt_matches, index_params.target_precision, checks, nn, 1);
00581 if (searchTime<bestSearchTime || bestSearchTime == -1) {
00582 bestSearchTime = searchTime;
00583 best_cb_index = cb_index;
00584 best_checks = checks;
00585 }
00586 }
00587 searchTime = bestSearchTime;
00588 cb_index = best_cb_index;
00589 checks = best_checks;
00590
00591 kmeans->set_cb_index(best_cb_index);
00592 logger().info("Optimum cb_index: %g\n",cb_index);
00593 ((KMeansIndexParams*)bestParams)->cb_index = cb_index;
00594 }
00595 else {
00596 searchTime = test_index_precision(*bestIndex, dataset, testDataset, gt_matches, index_params.target_precision, checks, nn, 1);
00597 }
00598
00599 logger().info("Required number of checks: %d \n",checks);;
00600 searchParams.checks = checks;
00601
00602 speedup = linear/searchTime;
00603
00604 gt_matches.release();
00605 }
00606
00607 return speedup;
00608 }
00609
00610 };
00611
00612 }
00613
00614 #endif