00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031 #ifndef _OPENCV_KMEANSTREE_H_
00032 #define _OPENCV_KMEANSTREE_H_
00033
00034 #include <algorithm>
00035 #include <string>
00036 #include <map>
00037 #include <cassert>
00038 #include <limits>
00039 #include <cmath>
00040
00041 #include "opencv2/flann/general.h"
00042 #include "opencv2/flann/nn_index.h"
00043 #include "opencv2/flann/matrix.h"
00044 #include "opencv2/flann/result_set.h"
00045 #include "opencv2/flann/heap.h"
00046 #include "opencv2/flann/allocator.h"
00047 #include "opencv2/flann/random.h"
00048
00049 using namespace std;
00050
00051 namespace cvflann
00052 {
00053
00054
00055 struct CV_EXPORTS KMeansIndexParams : public IndexParams {
00056 KMeansIndexParams(int branching_ = 32, int iterations_ = 11,
00057 flann_centers_init_t centers_init_ = CENTERS_RANDOM, float cb_index_ = 0.2 ) :
00058 IndexParams(KMEANS),
00059 branching(branching_),
00060 iterations(iterations_),
00061 centers_init(centers_init_),
00062 cb_index(cb_index_) {};
00063
00064 int branching;
00065 int iterations;
00066 flann_centers_init_t centers_init;
00067 float cb_index;
00068
00069 flann_algorithm_t getIndexType() const { return KMEANS; }
00070
00071 void print() const
00072 {
00073 logger().info("Index type: %d\n",(int)algorithm);
00074 logger().info("Branching: %d\n", branching);
00075 logger().info("Iterations: %d\n", iterations);
00076 logger().info("Centres initialisation: %d\n", centers_init);
00077 logger().info("Cluster boundary weight: %g\n", cb_index);
00078 }
00079
00080 };
00081
00082
00089 template <typename ELEM_TYPE, typename DIST_TYPE = typename DistType<ELEM_TYPE>::type >
00090 class KMeansIndex : public NNIndex<ELEM_TYPE>
00091 {
00092
00096 int branching;
00097
00102 int max_iter;
00103
00110 float cb_index;
00111
00115 const Matrix<ELEM_TYPE> dataset;
00116
00117 const IndexParams& index_params;
00118
00122 size_t size_;
00123
00127 size_t veclen_;
00128
00129
00133 struct KMeansNodeSt {
00137 DIST_TYPE* pivot;
00141 DIST_TYPE radius;
00145 DIST_TYPE mean_radius;
00149 DIST_TYPE variance;
00153 int size;
00157 KMeansNodeSt** childs;
00161 int* indices;
00165 int level;
00166 };
00167 typedef KMeansNodeSt* KMeansNode;
00168
00169
00170
00174 typedef BranchStruct<KMeansNode> BranchSt;
00175
00176
00180 KMeansNode root;
00181
00185 int* indices;
00186
00187
00195 PooledAllocator pool;
00196
00200 int memoryCounter;
00201
00202
00203 typedef void (KMeansIndex::*centersAlgFunction)(int, int*, int, int*, int&);
00204
00208 centersAlgFunction chooseCenters;
00209
00210
00211
00222 void chooseCentersRandom(int k, int* indices, int indices_length, int* centers, int& centers_length)
00223 {
00224 UniqueRandom r(indices_length);
00225
00226 int index;
00227 for (index=0;index<k;++index) {
00228 bool duplicate = true;
00229 int rnd;
00230 while (duplicate) {
00231 duplicate = false;
00232 rnd = r.next();
00233 if (rnd<0) {
00234 centers_length = index;
00235 return;
00236 }
00237
00238 centers[index] = indices[rnd];
00239
00240 for (int j=0;j<index;++j) {
00241 float sq = (float)flann_dist(dataset[centers[index]],dataset[centers[index]]+dataset.cols,dataset[centers[j]]);
00242 if (sq<1e-16) {
00243 duplicate = true;
00244 }
00245 }
00246 }
00247 }
00248
00249 centers_length = index;
00250 }
00251
00252
00263 void chooseCentersGonzales(int k, int* indices, int indices_length, int* centers, int& centers_length)
00264 {
00265 int n = indices_length;
00266
00267 int rnd = rand_int(n);
00268 assert(rnd >=0 && rnd < n);
00269
00270 centers[0] = indices[rnd];
00271
00272 int index;
00273 for (index=1; index<k; ++index) {
00274
00275 int best_index = -1;
00276 float best_val = 0;
00277 for (int j=0;j<n;++j) {
00278 float dist = (float)flann_dist(dataset[centers[0]],dataset[centers[0]]+dataset.cols,dataset[indices[j]]);
00279 for (int i=1;i<index;++i) {
00280 float tmp_dist = (float)flann_dist(dataset[centers[i]],dataset[centers[i]]+dataset.cols,dataset[indices[j]]);
00281 if (tmp_dist<dist) {
00282 dist = tmp_dist;
00283 }
00284 }
00285 if (dist>best_val) {
00286 best_val = dist;
00287 best_index = j;
00288 }
00289 }
00290 if (best_index!=-1) {
00291 centers[index] = indices[best_index];
00292 }
00293 else {
00294 break;
00295 }
00296 }
00297 centers_length = index;
00298 }
00299
00300
00314 void chooseCentersKMeanspp(int k, int* indices, int indices_length, int* centers, int& centers_length)
00315 {
00316 int n = indices_length;
00317
00318 double currentPot = 0;
00319 double* closestDistSq = new double[n];
00320
00321
00322 int index = rand_int(n);
00323 assert(index >=0 && index < n);
00324 centers[0] = indices[index];
00325
00326 for (int i = 0; i < n; i++) {
00327 closestDistSq[i] = flann_dist(dataset[indices[i]], dataset[indices[i]] + dataset.cols, dataset[indices[index]]);
00328 currentPot += closestDistSq[i];
00329 }
00330
00331
00332 const int numLocalTries = 1;
00333
00334
00335 int centerCount;
00336 for (centerCount = 1; centerCount < k; centerCount++) {
00337
00338
00339 double bestNewPot = -1;
00340 int bestNewIndex = -1;
00341 for (int localTrial = 0; localTrial < numLocalTries; localTrial++) {
00342
00343
00344
00345 double randVal = rand_double(currentPot);
00346 for (index = 0; index < n-1; index++) {
00347 if (randVal <= closestDistSq[index])
00348 break;
00349 else
00350 randVal -= closestDistSq[index];
00351 }
00352
00353
00354 double newPot = 0;
00355 for (int i = 0; i < n; i++)
00356 newPot += min( flann_dist(dataset[indices[i]], dataset[indices[i]] + dataset.cols, dataset[indices[index]]), closestDistSq[i] );
00357
00358
00359 if (bestNewPot < 0 || newPot < bestNewPot) {
00360 bestNewPot = newPot;
00361 bestNewIndex = index;
00362 }
00363 }
00364
00365
00366 centers[centerCount] = indices[bestNewIndex];
00367 currentPot = bestNewPot;
00368 for (int i = 0; i < n; i++)
00369 closestDistSq[i] = min( flann_dist(dataset[indices[i]], dataset[indices[i]]+dataset.cols, dataset[indices[bestNewIndex]]), closestDistSq[i] );
00370 }
00371
00372 centers_length = centerCount;
00373
00374 delete[] closestDistSq;
00375 }
00376
00377
00378
00379 public:
00380
00381
00382 flann_algorithm_t getType() const
00383 {
00384 return KMEANS;
00385 }
00386
00394 KMeansIndex(const Matrix<ELEM_TYPE>& inputData, const KMeansIndexParams& params = KMeansIndexParams() )
00395 : dataset(inputData), index_params(params), root(NULL), indices(NULL)
00396 {
00397 memoryCounter = 0;
00398
00399 size_ = dataset.rows;
00400 veclen_ = dataset.cols;
00401
00402 branching = params.branching;
00403 max_iter = params.iterations;
00404 if (max_iter<0) {
00405 max_iter = numeric_limits<int>::max();
00406 }
00407 flann_centers_init_t centersInit = params.centers_init;
00408
00409 if (centersInit==CENTERS_RANDOM) {
00410 chooseCenters = &KMeansIndex::chooseCentersRandom;
00411 }
00412 else if (centersInit==CENTERS_GONZALES) {
00413 chooseCenters = &KMeansIndex::chooseCentersGonzales;
00414 }
00415 else if (centersInit==CENTERS_KMEANSPP) {
00416 chooseCenters = &KMeansIndex::chooseCentersKMeanspp;
00417 }
00418 else {
00419 throw FLANNException("Unknown algorithm for choosing initial centers.");
00420 }
00421 cb_index = 0.4f;
00422
00423 }
00424
00425
00431 virtual ~KMeansIndex()
00432 {
00433 if (root != NULL) {
00434 free_centers(root);
00435 }
00436 if (indices!=NULL) {
00437 delete[] indices;
00438 }
00439 }
00440
00444 size_t size() const
00445 {
00446 return size_;
00447 }
00448
00452 size_t veclen() const
00453 {
00454 return veclen_;
00455 }
00456
00457
00458 void set_cb_index( float index)
00459 {
00460 cb_index = index;
00461 }
00462
00463
00468 int usedMemory() const
00469 {
00470 return pool.usedMemory+pool.wastedMemory+memoryCounter;
00471 }
00472
00476 void buildIndex()
00477 {
00478 if (branching<2) {
00479 throw FLANNException("Branching factor must be at least 2");
00480 }
00481
00482 indices = new int[size_];
00483 for (size_t i=0;i<size_;++i) {
00484 indices[i] = (int)i;
00485 }
00486
00487 root = pool.allocate<KMeansNodeSt>();
00488 computeNodeStatistics(root, indices, (int)size_);
00489 computeClustering(root, indices, (int)size_, branching,0);
00490 }
00491
00492
00493 void saveIndex(FILE* stream)
00494 {
00495 save_value(stream, branching);
00496 save_value(stream, max_iter);
00497 save_value(stream, memoryCounter);
00498 save_value(stream, cb_index);
00499 save_value(stream, *indices, (int)size_);
00500
00501 save_tree(stream, root);
00502 }
00503
00504
00505 void loadIndex(FILE* stream)
00506 {
00507 load_value(stream, branching);
00508 load_value(stream, max_iter);
00509 load_value(stream, memoryCounter);
00510 load_value(stream, cb_index);
00511 if (indices!=NULL) {
00512 delete[] indices;
00513 }
00514 indices = new int[size_];
00515 load_value(stream, *indices, (int)size_);
00516
00517 if (root!=NULL) {
00518 free_centers(root);
00519 }
00520 load_tree(stream, root);
00521 }
00522
00523
00533 void findNeighbors(ResultSet<ELEM_TYPE>& result, const ELEM_TYPE* vec, const SearchParams& searchParams)
00534 {
00535
00536 int maxChecks = searchParams.checks;
00537
00538 if (maxChecks<0) {
00539 findExactNN(root, result, vec);
00540 }
00541 else {
00542
00543 Heap<BranchSt>* heap = new Heap<BranchSt>((int)size_);
00544
00545 int checks = 0;
00546
00547 findNN(root, result, vec, checks, maxChecks, heap);
00548
00549 BranchSt branch;
00550 while (heap->popMin(branch) && (checks<maxChecks || !result.full())) {
00551 KMeansNode node = branch.node;
00552 findNN(node, result, vec, checks, maxChecks, heap);
00553 }
00554 assert(result.full());
00555
00556 delete heap;
00557 }
00558
00559 }
00560
00561
00569 int getClusterCenters(Matrix<DIST_TYPE>& centers)
00570 {
00571 int numClusters = centers.rows;
00572 if (numClusters<1) {
00573 throw FLANNException("Number of clusters must be at least 1");
00574 }
00575
00576 float variance;
00577 KMeansNode* clusters = new KMeansNode[numClusters];
00578
00579 int clusterCount = getMinVarianceClusters(root, clusters, numClusters, variance);
00580
00581
00582
00583
00584 for (int i=0;i<clusterCount;++i) {
00585 DIST_TYPE* center = clusters[i]->pivot;
00586 for (size_t j=0;j<veclen_;++j) {
00587 centers[i][j] = center[j];
00588 }
00589 }
00590 delete[] clusters;
00591
00592 return clusterCount;
00593 }
00594
00595 const IndexParams* getParameters() const
00596 {
00597 return &index_params;
00598 }
00599
00600
00601 private:
00602
00603
00604 void save_tree(FILE* stream, KMeansNode node)
00605 {
00606 save_value(stream, *node);
00607 save_value(stream, *(node->pivot), (int)veclen_);
00608 if (node->childs==NULL) {
00609 int indices_offset = (int)(node->indices - indices);
00610 save_value(stream, indices_offset);
00611 }
00612 else {
00613 for(int i=0; i<branching; ++i) {
00614 save_tree(stream, node->childs[i]);
00615 }
00616 }
00617 }
00618
00619
00620 void load_tree(FILE* stream, KMeansNode& node)
00621 {
00622 node = pool.allocate<KMeansNodeSt>();
00623 load_value(stream, *node);
00624 node->pivot = new DIST_TYPE[veclen_];
00625 load_value(stream, *(node->pivot), (int)veclen_);
00626 if (node->childs==NULL) {
00627 int indices_offset;
00628 load_value(stream, indices_offset);
00629 node->indices = indices + indices_offset;
00630 }
00631 else {
00632 node->childs = pool.allocate<KMeansNode>(branching);
00633 for(int i=0; i<branching; ++i) {
00634 load_tree(stream, node->childs[i]);
00635 }
00636 }
00637 }
00638
00639
00643 void free_centers(KMeansNode node)
00644 {
00645 delete[] node->pivot;
00646 if (node->childs!=NULL) {
00647 for (int k=0;k<branching;++k) {
00648 free_centers(node->childs[k]);
00649 }
00650 }
00651 }
00652
00660 void computeNodeStatistics(KMeansNode node, int* indices, int indices_length) {
00661
00662 double radius = 0;
00663 double variance = 0;
00664 DIST_TYPE* mean = new DIST_TYPE[veclen_];
00665 memoryCounter += (int)(veclen_*sizeof(DIST_TYPE));
00666
00667 memset(mean,0,veclen_*sizeof(float));
00668
00669 for (size_t i=0;i<size_;++i) {
00670 ELEM_TYPE* vec = dataset[indices[i]];
00671 for (size_t j=0;j<veclen_;++j) {
00672 mean[j] += vec[j];
00673 }
00674 variance += flann_dist(vec,vec+veclen_,zero());
00675 }
00676 for (size_t j=0;j<veclen_;++j) {
00677 mean[j] /= size_;
00678 }
00679 variance /= size_;
00680 variance -= flann_dist(mean,mean+veclen_,zero());
00681
00682 double tmp = 0;
00683 for (int i=0;i<indices_length;++i) {
00684 tmp = flann_dist(mean, mean + veclen_, dataset[indices[i]]);
00685 if (tmp>radius) {
00686 radius = tmp;
00687 }
00688 }
00689
00690 node->variance = (DIST_TYPE)variance;
00691 node->radius = (DIST_TYPE)radius;
00692 node->pivot = mean;
00693 }
00694
00695
00707 void computeClustering(KMeansNode node, int* indices, int indices_length, int branching, int level)
00708 {
00709 node->size = indices_length;
00710 node->level = level;
00711
00712 if (indices_length < branching) {
00713 node->indices = indices;
00714 sort(node->indices,node->indices+indices_length);
00715 node->childs = NULL;
00716 return;
00717 }
00718
00719 int* centers_idx = new int[branching];
00720 int centers_length;
00721 (this->*chooseCenters)(branching, indices, indices_length, centers_idx, centers_length);
00722
00723 if (centers_length<branching) {
00724 node->indices = indices;
00725 sort(node->indices,node->indices+indices_length);
00726 node->childs = NULL;
00727 return;
00728 }
00729
00730
00731 Matrix<double> dcenters(new double[branching*veclen_],branching,(long)veclen_);
00732 for (int i=0; i<centers_length; ++i) {
00733 ELEM_TYPE* vec = dataset[centers_idx[i]];
00734 for (size_t k=0; k<veclen_; ++k) {
00735 dcenters[i][k] = double(vec[k]);
00736 }
00737 }
00738 delete[] centers_idx;
00739
00740 float* radiuses = new float[branching];
00741 int* count = new int[branching];
00742 for (int i=0;i<branching;++i) {
00743 radiuses[i] = 0;
00744 count[i] = 0;
00745 }
00746
00747
00748 int* belongs_to = new int[indices_length];
00749 for (int i=0;i<indices_length;++i) {
00750
00751 double sq_dist = flann_dist(dataset[indices[i]], dataset[indices[i]] + veclen_ ,dcenters[0]);
00752 belongs_to[i] = 0;
00753 for (int j=1;j<branching;++j) {
00754 double new_sq_dist = flann_dist(dataset[indices[i]], dataset[indices[i]]+veclen_, dcenters[j]);
00755 if (sq_dist>new_sq_dist) {
00756 belongs_to[i] = j;
00757 sq_dist = new_sq_dist;
00758 }
00759 }
00760 if (sq_dist>radiuses[belongs_to[i]]) {
00761 radiuses[belongs_to[i]] = (float)sq_dist;
00762 }
00763 count[belongs_to[i]]++;
00764 }
00765
00766 bool converged = false;
00767 int iteration = 0;
00768 while (!converged && iteration<max_iter) {
00769 converged = true;
00770 iteration++;
00771
00772
00773 for (int i=0;i<branching;++i) {
00774 memset(dcenters[i],0,sizeof(double)*veclen_);
00775 radiuses[i] = 0;
00776 }
00777 for (int i=0;i<indices_length;++i) {
00778 ELEM_TYPE* vec = dataset[indices[i]];
00779 double* center = dcenters[belongs_to[i]];
00780 for (size_t k=0;k<veclen_;++k) {
00781 center[k] += vec[k];
00782 }
00783 }
00784 for (int i=0;i<branching;++i) {
00785 int cnt = count[i];
00786 for (size_t k=0;k<veclen_;++k) {
00787 dcenters[i][k] /= cnt;
00788 }
00789 }
00790
00791
00792 for (int i=0;i<indices_length;++i) {
00793 float sq_dist = (float)flann_dist(dataset[indices[i]], dataset[indices[i]]+veclen_ ,dcenters[0]);
00794 int new_centroid = 0;
00795 for (int j=1;j<branching;++j) {
00796 float new_sq_dist = (float)flann_dist(dataset[indices[i]], dataset[indices[i]]+veclen_,dcenters[j]);
00797 if (sq_dist>new_sq_dist) {
00798 new_centroid = j;
00799 sq_dist = new_sq_dist;
00800 }
00801 }
00802 if (sq_dist>radiuses[new_centroid]) {
00803 radiuses[new_centroid] = sq_dist;
00804 }
00805 if (new_centroid != belongs_to[i]) {
00806 count[belongs_to[i]]--;
00807 count[new_centroid]++;
00808 belongs_to[i] = new_centroid;
00809
00810 converged = false;
00811 }
00812 }
00813
00814 for (int i=0;i<branching;++i) {
00815
00816
00817 if (count[i]==0) {
00818 int j = (i+1)%branching;
00819 while (count[j]<=1) {
00820 j = (j+1)%branching;
00821 }
00822
00823 for (int k=0;k<indices_length;++k) {
00824 if (belongs_to[k]==j) {
00825 belongs_to[k] = i;
00826 count[j]--;
00827 count[i]++;
00828 break;
00829 }
00830 }
00831 converged = false;
00832 }
00833 }
00834
00835 }
00836
00837 DIST_TYPE** centers = new DIST_TYPE*[branching];
00838
00839 for (int i=0; i<branching; ++i) {
00840 centers[i] = new DIST_TYPE[veclen_];
00841 memoryCounter += (int)(veclen_*sizeof(DIST_TYPE));
00842 for (size_t k=0; k<veclen_; ++k) {
00843 centers[i][k] = (DIST_TYPE)dcenters[i][k];
00844 }
00845 }
00846
00847
00848
00849 node->childs = pool.allocate<KMeansNode>(branching);
00850 int start = 0;
00851 int end = start;
00852 for (int c=0;c<branching;++c) {
00853 int s = count[c];
00854
00855 double variance = 0;
00856 double mean_radius =0;
00857 for (int i=0;i<indices_length;++i) {
00858 if (belongs_to[i]==c) {
00859 double d = flann_dist(dataset[indices[i]],dataset[indices[i]]+veclen_,zero());
00860 variance += d;
00861 mean_radius += sqrt(d);
00862 swap(indices[i],indices[end]);
00863 swap(belongs_to[i],belongs_to[end]);
00864 end++;
00865 }
00866 }
00867 variance /= s;
00868 mean_radius /= s;
00869 variance -= flann_dist(centers[c],centers[c]+veclen_,zero());
00870
00871 node->childs[c] = pool.allocate<KMeansNodeSt>();
00872 node->childs[c]->radius = radiuses[c];
00873 node->childs[c]->pivot = centers[c];
00874 node->childs[c]->variance = (float)variance;
00875 node->childs[c]->mean_radius = (float)mean_radius;
00876 node->childs[c]->indices = NULL;
00877 computeClustering(node->childs[c],indices+start, end-start, branching, level+1);
00878 start=end;
00879 }
00880
00881 delete[] dcenters.data;
00882 delete[] centers;
00883 delete[] radiuses;
00884 delete[] count;
00885 delete[] belongs_to;
00886 }
00887
00888
00889
00903 void findNN(KMeansNode node, ResultSet<ELEM_TYPE>& result, const ELEM_TYPE* vec, int& checks, int maxChecks,
00904 Heap<BranchSt>* heap)
00905 {
00906
00907 {
00908 DIST_TYPE bsq = (DIST_TYPE)flann_dist(vec, vec+veclen_, node->pivot);
00909 DIST_TYPE rsq = node->radius;
00910 DIST_TYPE wsq = result.worstDist();
00911
00912 DIST_TYPE val = bsq-rsq-wsq;
00913 DIST_TYPE val2 = val*val-4*rsq*wsq;
00914
00915
00916 if (val>0 && val2>0) {
00917 return;
00918 }
00919 }
00920
00921 if (node->childs==NULL) {
00922 if (checks>=maxChecks) {
00923 if (result.full()) return;
00924 }
00925 checks += node->size;
00926 for (int i=0;i<node->size;++i) {
00927 result.addPoint(dataset[node->indices[i]], node->indices[i]);
00928 }
00929 }
00930 else {
00931 float* domain_distances = new float[branching];
00932 int closest_center = exploreNodeBranches(node, vec, domain_distances, heap);
00933 delete[] domain_distances;
00934 findNN(node->childs[closest_center],result,vec, checks, maxChecks, heap);
00935 }
00936 }
00937
00946 int exploreNodeBranches(KMeansNode node, const ELEM_TYPE* q, float* domain_distances, Heap<BranchSt>* heap)
00947 {
00948
00949 int best_index = 0;
00950 domain_distances[best_index] = (float)flann_dist(q,q+veclen_,node->childs[best_index]->pivot);
00951 for (int i=1;i<branching;++i) {
00952 domain_distances[i] = (float)flann_dist(q,q+veclen_,node->childs[i]->pivot);
00953 if (domain_distances[i]<domain_distances[best_index]) {
00954 best_index = i;
00955 }
00956 }
00957
00958
00959 for (int i=0;i<branching;++i) {
00960 if (i != best_index) {
00961 domain_distances[i] -= cb_index*node->childs[i]->variance;
00962
00963
00964
00965
00966
00967 heap->insert(BranchSt::make_branch(node->childs[i],domain_distances[i]));
00968 }
00969 }
00970
00971 return best_index;
00972 }
00973
00974
00978 void findExactNN(KMeansNode node, ResultSet<ELEM_TYPE>& result, const ELEM_TYPE* vec)
00979 {
00980
00981 {
00982 float bsq = (float)flann_dist(vec, vec+veclen_, node->pivot);
00983 float rsq = node->radius;
00984 float wsq = result.worstDist();
00985
00986 float val = bsq-rsq-wsq;
00987 float val2 = val*val-4*rsq*wsq;
00988
00989
00990 if (val>0 && val2>0) {
00991 return;
00992 }
00993 }
00994
00995
00996 if (node->childs==NULL) {
00997 for (int i=0;i<node->size;++i) {
00998 result.addPoint(dataset[node->indices[i]], node->indices[i]);
00999 }
01000 }
01001 else {
01002 int* sort_indices = new int[branching];
01003
01004 getCenterOrdering(node, vec, sort_indices);
01005
01006 for (int i=0; i<branching; ++i) {
01007 findExactNN(node->childs[sort_indices[i]],result,vec);
01008 }
01009
01010 delete[] sort_indices;
01011 }
01012 }
01013
01014
01020 void getCenterOrdering(KMeansNode node, const ELEM_TYPE* q, int* sort_indices)
01021 {
01022 float* domain_distances = new float[branching];
01023 for (int i=0;i<branching;++i) {
01024 float dist = (float)flann_dist(q, q+veclen_, node->childs[i]->pivot);
01025
01026 int j=0;
01027 while (domain_distances[j]<dist && j<i) j++;
01028 for (int k=i;k>j;--k) {
01029 domain_distances[k] = domain_distances[k-1];
01030 sort_indices[k] = sort_indices[k-1];
01031 }
01032 domain_distances[j] = dist;
01033 sort_indices[j] = i;
01034 }
01035 delete[] domain_distances;
01036 }
01037
01043 float getDistanceToBorder(float* p, float* c, float* q)
01044 {
01045 float sum = 0;
01046 float sum2 = 0;
01047
01048 for (int i=0;i<veclen_; ++i) {
01049 float t = c[i]-p[i];
01050 sum += t*(q[i]-(c[i]+p[i])/2);
01051 sum2 += t*t;
01052 }
01053
01054 return sum*sum/sum2;
01055 }
01056
01057
01067 int getMinVarianceClusters(KMeansNode root, KMeansNode* clusters, int clusters_length, float& varianceValue)
01068 {
01069 int clusterCount = 1;
01070 clusters[0] = root;
01071
01072 float meanVariance = root->variance*root->size;
01073
01074 while (clusterCount<clusters_length) {
01075 float minVariance = numeric_limits<float>::max();
01076 int splitIndex = -1;
01077
01078 for (int i=0;i<clusterCount;++i) {
01079 if (clusters[i]->childs != NULL) {
01080
01081 float variance = meanVariance - clusters[i]->variance*clusters[i]->size;
01082
01083 for (int j=0;j<branching;++j) {
01084 variance += clusters[i]->childs[j]->variance*clusters[i]->childs[j]->size;
01085 }
01086 if (variance<minVariance) {
01087 minVariance = variance;
01088 splitIndex = i;
01089 }
01090 }
01091 }
01092
01093 if (splitIndex==-1) break;
01094 if ( (branching+clusterCount-1) > clusters_length) break;
01095
01096 meanVariance = minVariance;
01097
01098
01099 KMeansNode toSplit = clusters[splitIndex];
01100 clusters[splitIndex] = toSplit->childs[0];
01101 for (int i=1;i<branching;++i) {
01102 clusters[clusterCount++] = toSplit->childs[i];
01103 }
01104 }
01105
01106 varianceValue = meanVariance/root->size;
01107 return clusterCount;
01108 }
01109 };
01110
01111
01112
01113
01114
01115 }
01116
01117 #endif //_OPENCV_KMEANSTREE_H_