opencv2: kmeans_index.h Source File

00001 /***********************************************************************
00002  * Software License Agreement (BSD License)
00003  *
00004  * Copyright 2008-2009  Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
00005  * Copyright 2008-2009  David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
00006  *
00007  * THE BSD LICENSE
00008  *
00009  * Redistribution and use in source and binary forms, with or without
00010  * modification, are permitted provided that the following conditions
00011  * are met:
00012  *
00013  * 1. Redistributions of source code must retain the above copyright
00014  *    notice, this list of conditions and the following disclaimer.
00015  * 2. Redistributions in binary form must reproduce the above copyright
00016  *    notice, this list of conditions and the following disclaimer in the
00017  *    documentation and/or other materials provided with the distribution.
00018  *
00019  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00020  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00021  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00022  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00023  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00024  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00025  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00026  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00027  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00028  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00029  *************************************************************************/
00030 
00031 #ifndef _OPENCV_KMEANSTREE_H_
00032 #define _OPENCV_KMEANSTREE_H_
00033 
00034 #include <algorithm>
00035 #include <string>
00036 #include <map>
00037 #include <cassert>
00038 #include <limits>
00039 #include <cmath>
00040 
00041 #include "opencv2/flann/general.h"
00042 #include "opencv2/flann/nn_index.h"
00043 #include "opencv2/flann/matrix.h"
00044 #include "opencv2/flann/result_set.h"
00045 #include "opencv2/flann/heap.h"
00046 #include "opencv2/flann/allocator.h"
00047 #include "opencv2/flann/random.h"
00048 
00049 using namespace std;
00050 
00051 namespace cvflann
00052 {
00053 
00054 
00055 struct CV_EXPORTS KMeansIndexParams : public IndexParams {
00056         KMeansIndexParams(int branching_ = 32, int iterations_ = 11,
00057                         flann_centers_init_t centers_init_ = CENTERS_RANDOM, float cb_index_ = 0.2 ) :
00058                 IndexParams(KMEANS),
00059                 branching(branching_),
00060                 iterations(iterations_),
00061                 centers_init(centers_init_),
00062                 cb_index(cb_index_) {};
00063 
00064         int branching;             // branching factor (for kmeans tree)
00065         int iterations;            // max iterations to perform in one kmeans clustering (kmeans tree)
00066         flann_centers_init_t centers_init;          // algorithm used for picking the initial cluster centers for kmeans tree
00067     float cb_index;            // cluster boundary index. Used when searching the kmeans tree
00068 
00069         flann_algorithm_t getIndexType() const { return KMEANS; }
00070 
00071         void print() const
00072         {
00073                 logger().info("Index type: %d\n",(int)algorithm);
00074                 logger().info("Branching: %d\n", branching);
00075                 logger().info("Iterations: %d\n", iterations);
00076                 logger().info("Centres initialisation: %d\n", centers_init);
00077                 logger().info("Cluster boundary weight: %g\n", cb_index);
00078         }
00079 
00080 };
00081 
00082 
00089 template <typename ELEM_TYPE, typename DIST_TYPE = typename DistType<ELEM_TYPE>::type >
00090 class KMeansIndex : public NNIndex<ELEM_TYPE>
00091 {
00092 
00096         int branching;
00097 
00102         int max_iter;
00103 
00110     float cb_index;
00111 
00115     const Matrix<ELEM_TYPE> dataset;
00116 
00117     const IndexParams& index_params;
00118 
00122     size_t size_;
00123 
00127     size_t veclen_;
00128 
00129 
00133         struct KMeansNodeSt     {
00137                 DIST_TYPE* pivot;
00141                 DIST_TYPE radius;
00145                 DIST_TYPE mean_radius;
00149                 DIST_TYPE variance;
00153                 int size;
00157                 KMeansNodeSt** childs;
00161                 int* indices;
00165                 int level;
00166         };
00167     typedef KMeansNodeSt* KMeansNode;
00168 
00169 
00170 
00174     typedef BranchStruct<KMeansNode> BranchSt;
00175 
00176 
00180         KMeansNode root;
00181 
00185         int* indices;
00186 
00187 
00195         PooledAllocator pool;
00196 
00200         int memoryCounter;
00201 
00202 
00203         typedef void (KMeansIndex::*centersAlgFunction)(int, int*, int, int*, int&);
00204 
00208     centersAlgFunction chooseCenters;
00209 
00210 
00211 
00222     void chooseCentersRandom(int k, int* indices, int indices_length, int* centers, int& centers_length)
00223     {
00224         UniqueRandom r(indices_length);
00225 
00226         int index;
00227         for (index=0;index<k;++index) {
00228             bool duplicate = true;
00229             int rnd;
00230             while (duplicate) {
00231                 duplicate = false;
00232                 rnd = r.next();
00233                 if (rnd<0) {
00234                     centers_length = index;
00235                     return;
00236                 }
00237 
00238                 centers[index] = indices[rnd];
00239 
00240                 for (int j=0;j<index;++j) {
00241                     float sq = (float)flann_dist(dataset[centers[index]],dataset[centers[index]]+dataset.cols,dataset[centers[j]]);
00242                     if (sq<1e-16) {
00243                         duplicate = true;
00244                     }
00245                 }
00246             }
00247         }
00248 
00249         centers_length = index;
00250     }
00251 
00252 
00263     void chooseCentersGonzales(int k, int* indices, int indices_length, int* centers, int& centers_length)
00264     {
00265         int n = indices_length;
00266 
00267         int rnd = rand_int(n);
00268         assert(rnd >=0 && rnd < n);
00269 
00270         centers[0] = indices[rnd];
00271 
00272         int index;
00273         for (index=1; index<k; ++index) {
00274 
00275             int best_index = -1;
00276             float best_val = 0;
00277             for (int j=0;j<n;++j) {
00278                 float dist = (float)flann_dist(dataset[centers[0]],dataset[centers[0]]+dataset.cols,dataset[indices[j]]);
00279                 for (int i=1;i<index;++i) {
00280                         float tmp_dist = (float)flann_dist(dataset[centers[i]],dataset[centers[i]]+dataset.cols,dataset[indices[j]]);
00281                     if (tmp_dist<dist) {
00282                         dist = tmp_dist;
00283                     }
00284                 }
00285                 if (dist>best_val) {
00286                     best_val = dist;
00287                     best_index = j;
00288                 }
00289             }
00290             if (best_index!=-1) {
00291                 centers[index] = indices[best_index];
00292             }
00293             else {
00294                 break;
00295             }
00296         }
00297         centers_length = index;
00298     }
00299 
00300 
00314     void chooseCentersKMeanspp(int k, int* indices, int indices_length, int* centers, int& centers_length)
00315     {
00316         int n = indices_length;
00317 
00318         double currentPot = 0;
00319         double* closestDistSq = new double[n];
00320 
00321         // Choose one random center and set the closestDistSq values
00322         int index = rand_int(n);
00323         assert(index >=0 && index < n);
00324         centers[0] = indices[index];
00325 
00326         for (int i = 0; i < n; i++) {
00327             closestDistSq[i] = flann_dist(dataset[indices[i]], dataset[indices[i]] + dataset.cols, dataset[indices[index]]);
00328             currentPot += closestDistSq[i];
00329         }
00330 
00331 
00332         const int numLocalTries = 1;
00333 
00334         // Choose each center
00335         int centerCount;
00336         for (centerCount = 1; centerCount < k; centerCount++) {
00337 
00338             // Repeat several trials
00339             double bestNewPot = -1;
00340             int bestNewIndex = -1;
00341             for (int localTrial = 0; localTrial < numLocalTries; localTrial++) {
00342 
00343                 // Choose our center - have to be slightly careful to return a valid answer even accounting
00344                 // for possible rounding errors
00345             double randVal = rand_double(currentPot);
00346                 for (index = 0; index < n-1; index++) {
00347                     if (randVal <= closestDistSq[index])
00348                         break;
00349                     else
00350                         randVal -= closestDistSq[index];
00351                 }
00352 
00353                 // Compute the new potential
00354                 double newPot = 0;
00355                 for (int i = 0; i < n; i++)
00356                     newPot += min( flann_dist(dataset[indices[i]], dataset[indices[i]] + dataset.cols, dataset[indices[index]]), closestDistSq[i] );
00357 
00358                 // Store the best result
00359                 if (bestNewPot < 0 || newPot < bestNewPot) {
00360                     bestNewPot = newPot;
00361                     bestNewIndex = index;
00362                 }
00363             }
00364 
00365             // Add the appropriate center
00366             centers[centerCount] = indices[bestNewIndex];
00367             currentPot = bestNewPot;
00368             for (int i = 0; i < n; i++)
00369                 closestDistSq[i] = min( flann_dist(dataset[indices[i]], dataset[indices[i]]+dataset.cols, dataset[indices[bestNewIndex]]), closestDistSq[i] );
00370         }
00371 
00372         centers_length = centerCount;
00373 
00374         delete[] closestDistSq;
00375     }
00376 
00377 
00378 
00379 public:
00380 
00381 
00382     flann_algorithm_t getType() const
00383     {
00384         return KMEANS;
00385     }
00386 
00394         KMeansIndex(const Matrix<ELEM_TYPE>& inputData, const KMeansIndexParams& params = KMeansIndexParams() )
00395                 : dataset(inputData), index_params(params), root(NULL), indices(NULL)
00396         {
00397                 memoryCounter = 0;
00398 
00399         size_ = dataset.rows;
00400         veclen_ = dataset.cols;
00401 
00402         branching = params.branching;
00403         max_iter = params.iterations;
00404         if (max_iter<0) {
00405                 max_iter = numeric_limits<int>::max();
00406         }
00407         flann_centers_init_t centersInit = params.centers_init;
00408 
00409         if (centersInit==CENTERS_RANDOM) {
00410                 chooseCenters = &KMeansIndex::chooseCentersRandom;
00411         }
00412         else if (centersInit==CENTERS_GONZALES) {
00413                 chooseCenters = &KMeansIndex::chooseCentersGonzales;
00414         }
00415         else if (centersInit==CENTERS_KMEANSPP) {
00416                         chooseCenters = &KMeansIndex::chooseCentersKMeanspp;
00417         }
00418                 else {
00419                         throw FLANNException("Unknown algorithm for choosing initial centers.");
00420                 }
00421         cb_index = 0.4f;
00422 
00423         }
00424 
00425 
00431         virtual ~KMeansIndex()
00432         {
00433                 if (root != NULL) {
00434                         free_centers(root);
00435                 }
00436         if (indices!=NULL) {
00437                   delete[] indices;
00438         }
00439         }
00440 
00444         size_t size() const
00445     {
00446         return size_;
00447     }
00448 
00452     size_t veclen() const
00453     {
00454         return veclen_;
00455     }
00456 
00457 
00458     void set_cb_index( float index)
00459     {
00460         cb_index = index;
00461     }
00462 
00463 
00468         int usedMemory() const
00469         {
00470                 return  pool.usedMemory+pool.wastedMemory+memoryCounter;
00471         }
00472 
00476         void buildIndex()
00477         {
00478                 if (branching<2) {
00479                         throw FLANNException("Branching factor must be at least 2");
00480                 }
00481 
00482                 indices = new int[size_];
00483                 for (size_t i=0;i<size_;++i) {
00484                         indices[i] = (int)i;
00485                 }
00486 
00487                 root = pool.allocate<KMeansNodeSt>();
00488                 computeNodeStatistics(root, indices, (int)size_);
00489                 computeClustering(root, indices, (int)size_, branching,0);
00490         }
00491 
00492 
00493     void saveIndex(FILE* stream)
00494     {
00495         save_value(stream, branching);
00496         save_value(stream, max_iter);
00497         save_value(stream, memoryCounter);
00498         save_value(stream, cb_index);
00499         save_value(stream, *indices, (int)size_);
00500 
00501                 save_tree(stream, root);
00502     }
00503 
00504 
00505     void loadIndex(FILE* stream)
00506     {
00507         load_value(stream, branching);
00508         load_value(stream, max_iter);
00509         load_value(stream, memoryCounter);
00510         load_value(stream, cb_index);
00511         if (indices!=NULL) {
00512                 delete[] indices;
00513         }
00514                 indices = new int[size_];
00515         load_value(stream, *indices, (int)size_);
00516 
00517         if (root!=NULL) {
00518                 free_centers(root);
00519         }
00520                 load_tree(stream, root);
00521     }
00522 
00523 
00533     void findNeighbors(ResultSet<ELEM_TYPE>& result, const ELEM_TYPE* vec, const SearchParams& searchParams)
00534     {
00535 
00536         int maxChecks = searchParams.checks;
00537 
00538         if (maxChecks<0) {
00539             findExactNN(root, result, vec);
00540         }
00541         else {
00542              // Priority queue storing intermediate branches in the best-bin-first search
00543             Heap<BranchSt>* heap = new Heap<BranchSt>((int)size_);
00544 
00545             int checks = 0;
00546 
00547             findNN(root, result, vec, checks, maxChecks, heap);
00548 
00549             BranchSt branch;
00550             while (heap->popMin(branch) && (checks<maxChecks || !result.full())) {
00551                 KMeansNode node = branch.node;
00552                 findNN(node, result, vec, checks, maxChecks, heap);
00553             }
00554             assert(result.full());
00555 
00556             delete heap;
00557         }
00558 
00559     }
00560 
00561 
00569     int getClusterCenters(Matrix<DIST_TYPE>& centers)
00570     {
00571         int numClusters = centers.rows;
00572         if (numClusters<1) {
00573             throw FLANNException("Number of clusters must be at least 1");
00574         }
00575 
00576         float variance;
00577         KMeansNode* clusters = new KMeansNode[numClusters];
00578 
00579         int clusterCount = getMinVarianceClusters(root, clusters, numClusters, variance);
00580 
00581 //         logger().info("Clusters requested: %d, returning %d\n",numClusters, clusterCount);
00582 
00583 
00584         for (int i=0;i<clusterCount;++i) {
00585             DIST_TYPE* center = clusters[i]->pivot;
00586             for (size_t j=0;j<veclen_;++j) {
00587                 centers[i][j] = center[j];
00588             }
00589         }
00590                 delete[] clusters;
00591 
00592         return clusterCount;
00593     }
00594 
00595         const IndexParams* getParameters() const
00596         {
00597                 return &index_params;
00598         }
00599 
00600 
00601 private:
00602 
00603 
00604     void save_tree(FILE* stream, KMeansNode node)
00605     {
00606         save_value(stream, *node);
00607         save_value(stream, *(node->pivot), (int)veclen_);
00608         if (node->childs==NULL) {
00609                 int indices_offset = (int)(node->indices - indices);
00610                 save_value(stream, indices_offset);
00611         }
00612         else {
00613                 for(int i=0; i<branching; ++i) {
00614                         save_tree(stream, node->childs[i]);
00615                 }
00616         }
00617     }
00618 
00619 
00620     void load_tree(FILE* stream, KMeansNode& node)
00621     {
00622         node = pool.allocate<KMeansNodeSt>();
00623         load_value(stream, *node);
00624         node->pivot = new DIST_TYPE[veclen_];
00625         load_value(stream, *(node->pivot), (int)veclen_);
00626         if (node->childs==NULL) {
00627                 int indices_offset;
00628                 load_value(stream, indices_offset);
00629                 node->indices = indices + indices_offset;
00630         }
00631         else {
00632                 node->childs = pool.allocate<KMeansNode>(branching);
00633                 for(int i=0; i<branching; ++i) {
00634                         load_tree(stream, node->childs[i]);
00635                 }
00636         }
00637     }
00638 
00639 
00643     void free_centers(KMeansNode node)
00644     {
00645         delete[] node->pivot;
00646         if (node->childs!=NULL) {
00647             for (int k=0;k<branching;++k) {
00648                 free_centers(node->childs[k]);
00649             }
00650         }
00651     }
00652 
00660         void computeNodeStatistics(KMeansNode node, int* indices, int indices_length) {
00661 
00662                 double radius = 0;
00663                 double variance = 0;
00664                 DIST_TYPE* mean = new DIST_TYPE[veclen_];
00665                 memoryCounter += (int)(veclen_*sizeof(DIST_TYPE));
00666 
00667         memset(mean,0,veclen_*sizeof(float));
00668 
00669                 for (size_t i=0;i<size_;++i) {
00670                         ELEM_TYPE* vec = dataset[indices[i]];
00671             for (size_t j=0;j<veclen_;++j) {
00672                 mean[j] += vec[j];
00673             }
00674                         variance += flann_dist(vec,vec+veclen_,zero());
00675                 }
00676                 for (size_t j=0;j<veclen_;++j) {
00677                         mean[j] /= size_;
00678                 }
00679                 variance /= size_;
00680                 variance -= flann_dist(mean,mean+veclen_,zero());
00681 
00682                 double tmp = 0;
00683                 for (int i=0;i<indices_length;++i) {
00684                         tmp = flann_dist(mean, mean + veclen_, dataset[indices[i]]);
00685                         if (tmp>radius) {
00686                                 radius = tmp;
00687                         }
00688                 }
00689 
00690                 node->variance = (DIST_TYPE)variance;
00691                 node->radius = (DIST_TYPE)radius;
00692                 node->pivot = mean;
00693         }
00694 
00695 
00707         void computeClustering(KMeansNode node, int* indices, int indices_length, int branching, int level)
00708         {
00709                 node->size = indices_length;
00710                 node->level = level;
00711 
00712                 if (indices_length < branching) {
00713                         node->indices = indices;
00714             sort(node->indices,node->indices+indices_length);
00715             node->childs = NULL;
00716                         return;
00717                 }
00718 
00719                 int* centers_idx = new int[branching];
00720         int centers_length;
00721                 (this->*chooseCenters)(branching, indices, indices_length, centers_idx, centers_length);
00722 
00723                 if (centers_length<branching) {
00724             node->indices = indices;
00725             sort(node->indices,node->indices+indices_length);
00726             node->childs = NULL;
00727                         return;
00728                 }
00729 
00730 
00731         Matrix<double> dcenters(new double[branching*veclen_],branching,(long)veclen_);
00732         for (int i=0; i<centers_length; ++i) {
00733                 ELEM_TYPE* vec = dataset[centers_idx[i]];
00734             for (size_t k=0; k<veclen_; ++k) {
00735                 dcenters[i][k] = double(vec[k]);
00736             }
00737         }
00738                 delete[] centers_idx;
00739 
00740                 float* radiuses = new float[branching];
00741                 int* count = new int[branching];
00742         for (int i=0;i<branching;++i) {
00743             radiuses[i] = 0;
00744             count[i] = 0;
00745         }
00746 
00747         //      assign points to clusters
00748                 int* belongs_to = new int[indices_length];
00749                 for (int i=0;i<indices_length;++i) {
00750 
00751                         double sq_dist = flann_dist(dataset[indices[i]], dataset[indices[i]] + veclen_ ,dcenters[0]);
00752                         belongs_to[i] = 0;
00753                         for (int j=1;j<branching;++j) {
00754                                 double new_sq_dist = flann_dist(dataset[indices[i]], dataset[indices[i]]+veclen_, dcenters[j]);
00755                                 if (sq_dist>new_sq_dist) {
00756                                         belongs_to[i] = j;
00757                                         sq_dist = new_sq_dist;
00758                                 }
00759                         }
00760             if (sq_dist>radiuses[belongs_to[i]]) {
00761                 radiuses[belongs_to[i]] = (float)sq_dist;
00762             }
00763                         count[belongs_to[i]]++;
00764                 }
00765 
00766                 bool converged = false;
00767                 int iteration = 0;
00768                 while (!converged && iteration<max_iter) {
00769                         converged = true;
00770                         iteration++;
00771 
00772                         // compute the new cluster centers
00773                         for (int i=0;i<branching;++i) {
00774                 memset(dcenters[i],0,sizeof(double)*veclen_);
00775                 radiuses[i] = 0;
00776                         }
00777             for (int i=0;i<indices_length;++i) {
00778                                 ELEM_TYPE* vec = dataset[indices[i]];
00779                                 double* center = dcenters[belongs_to[i]];
00780                                 for (size_t k=0;k<veclen_;++k) {
00781                                         center[k] += vec[k];
00782                                 }
00783                         }
00784                         for (int i=0;i<branching;++i) {
00785                 int cnt = count[i];
00786                 for (size_t k=0;k<veclen_;++k) {
00787                     dcenters[i][k] /= cnt;
00788                 }
00789                         }
00790 
00791                         // reassign points to clusters
00792                         for (int i=0;i<indices_length;++i) {
00793                                 float sq_dist = (float)flann_dist(dataset[indices[i]], dataset[indices[i]]+veclen_ ,dcenters[0]);
00794                                 int new_centroid = 0;
00795                                 for (int j=1;j<branching;++j) {
00796                                         float new_sq_dist = (float)flann_dist(dataset[indices[i]], dataset[indices[i]]+veclen_,dcenters[j]);
00797                                         if (sq_dist>new_sq_dist) {
00798                                                 new_centroid = j;
00799                                                 sq_dist = new_sq_dist;
00800                                         }
00801                                 }
00802                                 if (sq_dist>radiuses[new_centroid]) {
00803                                         radiuses[new_centroid] = sq_dist;
00804                                 }
00805                                 if (new_centroid != belongs_to[i]) {
00806                                         count[belongs_to[i]]--;
00807                                         count[new_centroid]++;
00808                                         belongs_to[i] = new_centroid;
00809 
00810                                         converged = false;
00811                                 }
00812                         }
00813 
00814                         for (int i=0;i<branching;++i) {
00815                                 // if one cluster converges to an empty cluster,
00816                                 // move an element into that cluster
00817                                 if (count[i]==0) {
00818                                         int j = (i+1)%branching;
00819                                         while (count[j]<=1) {
00820                                                 j = (j+1)%branching;
00821                                         }
00822 
00823                                         for (int k=0;k<indices_length;++k) {
00824                                                 if (belongs_to[k]==j) {
00825                                                         belongs_to[k] = i;
00826                                                         count[j]--;
00827                                                         count[i]++;
00828                                                         break;
00829                                                 }
00830                                         }
00831                                         converged = false;
00832                                 }
00833                         }
00834 
00835                 }
00836 
00837         DIST_TYPE** centers = new DIST_TYPE*[branching];
00838 
00839         for (int i=0; i<branching; ++i) {
00840                         centers[i] = new DIST_TYPE[veclen_];
00841                         memoryCounter += (int)(veclen_*sizeof(DIST_TYPE));
00842             for (size_t k=0; k<veclen_; ++k) {
00843                 centers[i][k] = (DIST_TYPE)dcenters[i][k];
00844             }
00845                 }
00846 
00847 
00848                 // compute kmeans clustering for each of the resulting clusters
00849                 node->childs = pool.allocate<KMeansNode>(branching);
00850                 int start = 0;
00851                 int end = start;
00852                 for (int c=0;c<branching;++c) {
00853                         int s = count[c];
00854 
00855                         double variance = 0;
00856                     double mean_radius =0;
00857                         for (int i=0;i<indices_length;++i) {
00858                                 if (belongs_to[i]==c) {
00859                                         double d = flann_dist(dataset[indices[i]],dataset[indices[i]]+veclen_,zero());
00860                                         variance += d;
00861                                         mean_radius += sqrt(d);
00862                                         swap(indices[i],indices[end]);
00863                                         swap(belongs_to[i],belongs_to[end]);
00864                                         end++;
00865                                 }
00866                         }
00867                         variance /= s;
00868                         mean_radius /= s;
00869                         variance -= flann_dist(centers[c],centers[c]+veclen_,zero());
00870 
00871                         node->childs[c] = pool.allocate<KMeansNodeSt>();
00872                         node->childs[c]->radius = radiuses[c];
00873                         node->childs[c]->pivot = centers[c];
00874                         node->childs[c]->variance = (float)variance;
00875                         node->childs[c]->mean_radius = (float)mean_radius;
00876                         node->childs[c]->indices = NULL;
00877                         computeClustering(node->childs[c],indices+start, end-start, branching, level+1);
00878                         start=end;
00879                 }
00880 
00881                 delete[] dcenters.data;
00882                 delete[] centers;
00883                 delete[] radiuses;
00884                 delete[] count;
00885                 delete[] belongs_to;
00886         }
00887 
00888 
00889 
00903         void findNN(KMeansNode node, ResultSet<ELEM_TYPE>& result, const ELEM_TYPE* vec, int& checks, int maxChecks,
00904                         Heap<BranchSt>* heap)
00905         {
00906                 // Ignore those clusters that are too far away
00907                 {
00908                         DIST_TYPE bsq = (DIST_TYPE)flann_dist(vec, vec+veclen_, node->pivot);
00909                         DIST_TYPE rsq = node->radius;
00910                         DIST_TYPE wsq = result.worstDist();
00911 
00912                         DIST_TYPE val = bsq-rsq-wsq;
00913                         DIST_TYPE val2 = val*val-4*rsq*wsq;
00914 
00915                         //if (val>0) {
00916                         if (val>0 && val2>0) {
00917                                 return;
00918                         }
00919                 }
00920 
00921                 if (node->childs==NULL) {
00922             if (checks>=maxChecks) {
00923                 if (result.full()) return;
00924             }
00925             checks += node->size;
00926                         for (int i=0;i<node->size;++i) {
00927                                 result.addPoint(dataset[node->indices[i]], node->indices[i]);
00928                         }
00929                 }
00930                 else {
00931                         float* domain_distances = new float[branching];
00932                         int closest_center = exploreNodeBranches(node, vec, domain_distances, heap);
00933                         delete[] domain_distances;
00934                         findNN(node->childs[closest_center],result,vec, checks, maxChecks, heap);
00935                 }
00936         }
00937 
00946         int exploreNodeBranches(KMeansNode node, const ELEM_TYPE* q, float* domain_distances, Heap<BranchSt>* heap)
00947         {
00948 
00949                 int best_index = 0;
00950                 domain_distances[best_index] = (float)flann_dist(q,q+veclen_,node->childs[best_index]->pivot);
00951                 for (int i=1;i<branching;++i) {
00952                         domain_distances[i] = (float)flann_dist(q,q+veclen_,node->childs[i]->pivot);
00953                         if (domain_distances[i]<domain_distances[best_index]) {
00954                                 best_index = i;
00955                         }
00956                 }
00957 
00958 //              float* best_center = node->childs[best_index]->pivot;
00959                 for (int i=0;i<branching;++i) {
00960                         if (i != best_index) {
00961                                 domain_distances[i] -= cb_index*node->childs[i]->variance;
00962 
00963 //                              float dist_to_border = getDistanceToBorder(node.childs[i].pivot,best_center,q);
00964 //                              if (domain_distances[i]<dist_to_border) {
00965 //                                      domain_distances[i] = dist_to_border;
00966 //                              }
00967                                 heap->insert(BranchSt::make_branch(node->childs[i],domain_distances[i]));
00968                         }
00969                 }
00970 
00971                 return best_index;
00972         }
00973 
00974 
00978         void findExactNN(KMeansNode node, ResultSet<ELEM_TYPE>& result, const ELEM_TYPE* vec)
00979         {
00980                 // Ignore those clusters that are too far away
00981                 {
00982                         float bsq = (float)flann_dist(vec, vec+veclen_, node->pivot);
00983                         float rsq = node->radius;
00984                         float wsq = result.worstDist();
00985 
00986                         float val = bsq-rsq-wsq;
00987                         float val2 = val*val-4*rsq*wsq;
00988 
00989         //              if (val>0) {
00990                         if (val>0 && val2>0) {
00991                                 return;
00992                         }
00993                 }
00994 
00995 
00996                 if (node->childs==NULL) {
00997                         for (int i=0;i<node->size;++i) {
00998                                 result.addPoint(dataset[node->indices[i]], node->indices[i]);
00999                         }
01000                 }
01001                 else {
01002                         int* sort_indices = new int[branching];
01003 
01004                         getCenterOrdering(node, vec, sort_indices);
01005 
01006                         for (int i=0; i<branching; ++i) {
01007                                 findExactNN(node->childs[sort_indices[i]],result,vec);
01008                         }
01009 
01010                         delete[] sort_indices;
01011                 }
01012         }
01013 
01014 
01020         void getCenterOrdering(KMeansNode node, const ELEM_TYPE* q, int* sort_indices)
01021         {
01022                 float* domain_distances = new float[branching];
01023                 for (int i=0;i<branching;++i) {
01024                         float dist = (float)flann_dist(q, q+veclen_, node->childs[i]->pivot);
01025 
01026                         int j=0;
01027                         while (domain_distances[j]<dist && j<i) j++;
01028                         for (int k=i;k>j;--k) {
01029                                 domain_distances[k] = domain_distances[k-1];
01030                                 sort_indices[k] = sort_indices[k-1];
01031                         }
01032                         domain_distances[j] = dist;
01033                         sort_indices[j] = i;
01034                 }
01035                 delete[] domain_distances;
01036         }
01037 
01043         float getDistanceToBorder(float* p, float* c, float* q)
01044         {
01045                 float sum = 0;
01046                 float sum2 = 0;
01047 
01048                 for (int i=0;i<veclen_; ++i) {
01049                         float t = c[i]-p[i];
01050                         sum += t*(q[i]-(c[i]+p[i])/2);
01051                         sum2 += t*t;
01052                 }
01053 
01054                 return sum*sum/sum2;
01055         }
01056 
01057 
01067         int getMinVarianceClusters(KMeansNode root, KMeansNode* clusters, int clusters_length, float& varianceValue)
01068         {
01069                 int clusterCount = 1;
01070                 clusters[0] = root;
01071 
01072                 float meanVariance = root->variance*root->size;
01073 
01074                 while (clusterCount<clusters_length) {
01075                         float minVariance = numeric_limits<float>::max();
01076                         int splitIndex = -1;
01077 
01078                         for (int i=0;i<clusterCount;++i) {
01079                                 if (clusters[i]->childs != NULL) {
01080 
01081                                         float variance = meanVariance - clusters[i]->variance*clusters[i]->size;
01082 
01083                                         for (int j=0;j<branching;++j) {
01084                                                 variance += clusters[i]->childs[j]->variance*clusters[i]->childs[j]->size;
01085                                         }
01086                                         if (variance<minVariance) {
01087                                                 minVariance = variance;
01088                                                 splitIndex = i;
01089                                         }
01090                                 }
01091                         }
01092 
01093                         if (splitIndex==-1) break;
01094                         if ( (branching+clusterCount-1) > clusters_length) break;
01095 
01096                         meanVariance = minVariance;
01097 
01098                         // split node
01099                         KMeansNode toSplit = clusters[splitIndex];
01100                         clusters[splitIndex] = toSplit->childs[0];
01101                         for (int i=1;i<branching;++i) {
01102                                 clusters[clusterCount++] = toSplit->childs[i];
01103                         }
01104                 }
01105 
01106                 varianceValue = meanVariance/root->size;
01107                 return clusterCount;
01108         }
01109 };
01110 
01111 
01112 
01113 //register_index(KMEANS,KMeansTree)
01114 
01115 } // namespace cvflann
01116 
01117 #endif //_OPENCV_KMEANSTREE_H_