#include <kmeans_index.h>
Classes | |
struct | KMeansNodeSt |
Public Member Functions | |
void | buildIndex () |
void | findNeighbors (ResultSet< ELEM_TYPE > &result, const ELEM_TYPE *vec, const SearchParams &searchParams) |
int | getClusterCenters (Matrix< DIST_TYPE > ¢ers) |
const IndexParams * | getParameters () const |
flann_algorithm_t | getType () const |
KMeansIndex (const Matrix< ELEM_TYPE > &inputData, const KMeansIndexParams ¶ms=KMeansIndexParams()) | |
void | loadIndex (FILE *stream) |
void | saveIndex (FILE *stream) |
void | set_cb_index (float index) |
size_t | size () const |
int | usedMemory () const |
size_t | veclen () const |
virtual | ~KMeansIndex () |
Private Types | |
typedef BranchStruct< KMeansNode > | BranchSt |
typedef void(KMeansIndex::* | centersAlgFunction )(int, int *, int, int *, int &) |
typedef KMeansNodeSt * | KMeansNode |
Private Member Functions | |
void | chooseCentersGonzales (int k, int *indices, int indices_length, int *centers, int ¢ers_length) |
void | chooseCentersKMeanspp (int k, int *indices, int indices_length, int *centers, int ¢ers_length) |
void | chooseCentersRandom (int k, int *indices, int indices_length, int *centers, int ¢ers_length) |
void | computeClustering (KMeansNode node, int *indices, int indices_length, int branching, int level) |
void | computeNodeStatistics (KMeansNode node, int *indices, int indices_length) |
int | exploreNodeBranches (KMeansNode node, const ELEM_TYPE *q, float *domain_distances, Heap< BranchSt > *heap) |
void | findExactNN (KMeansNode node, ResultSet< ELEM_TYPE > &result, const ELEM_TYPE *vec) |
void | findNN (KMeansNode node, ResultSet< ELEM_TYPE > &result, const ELEM_TYPE *vec, int &checks, int maxChecks, Heap< BranchSt > *heap) |
void | free_centers (KMeansNode node) |
void | getCenterOrdering (KMeansNode node, const ELEM_TYPE *q, int *sort_indices) |
float | getDistanceToBorder (float *p, float *c, float *q) |
int | getMinVarianceClusters (KMeansNode root, KMeansNode *clusters, int clusters_length, float &varianceValue) |
void | load_tree (FILE *stream, KMeansNode &node) |
void | save_tree (FILE *stream, KMeansNode node) |
Private Attributes | |
int | branching |
float | cb_index |
centersAlgFunction | chooseCenters |
const Matrix< ELEM_TYPE > | dataset |
const IndexParams & | index_params |
int * | indices |
int | max_iter |
int | memoryCounter |
PooledAllocator | pool |
KMeansNode | root |
size_t | size_ |
size_t | veclen_ |
Hierarchical kmeans index
Contains a tree constructed through a hierarchical kmeans clustering and other information for indexing a set of points for nearest-neighbour matching.
Definition at line 90 of file kmeans_index.h.
typedef BranchStruct<KMeansNode> cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::BranchSt [private] |
Alias definition for a nicer syntax.
Definition at line 174 of file kmeans_index.h.
typedef void(KMeansIndex::* cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::centersAlgFunction)(int, int *, int, int *, int &) [private] |
Definition at line 203 of file kmeans_index.h.
typedef KMeansNodeSt* cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::KMeansNode [private] |
Definition at line 167 of file kmeans_index.h.
cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::KMeansIndex | ( | const Matrix< ELEM_TYPE > & | inputData, | |
const KMeansIndexParams & | params = KMeansIndexParams() | |||
) | [inline] |
Index constructor
Params: inputData = dataset with the input features params = parameters passed to the hierarchical k-means algorithm
Definition at line 394 of file kmeans_index.h.
virtual cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::~KMeansIndex | ( | ) | [inline, virtual] |
Index destructor.
Release the memory used by the index.
Definition at line 431 of file kmeans_index.h.
void cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::buildIndex | ( | ) | [inline, virtual] |
Builds the index
Implements cvflann::NNIndex< ELEM_TYPE >.
Definition at line 476 of file kmeans_index.h.
void cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::chooseCentersGonzales | ( | int | k, | |
int * | indices, | |||
int | indices_length, | |||
int * | centers, | |||
int & | centers_length | |||
) | [inline, private] |
Chooses the initial centers in the k-means using Gonzales' algorithm so that the centers are spaced apart from each other.
Params: k = number of centers vecs = the dataset of points indices = indices in the dataset Returns:
Definition at line 263 of file kmeans_index.h.
void cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::chooseCentersKMeanspp | ( | int | k, | |
int * | indices, | |||
int | indices_length, | |||
int * | centers, | |||
int & | centers_length | |||
) | [inline, private] |
Chooses the initial centers in the k-means using the algorithm proposed in the KMeans++ paper: Arthur, David; Vassilvitskii, Sergei - k-means++: The Advantages of Careful Seeding
Implementation of this function was converted from the one provided in Arthur's code.
Params: k = number of centers vecs = the dataset of points indices = indices in the dataset Returns:
Definition at line 314 of file kmeans_index.h.
void cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::chooseCentersRandom | ( | int | k, | |
int * | indices, | |||
int | indices_length, | |||
int * | centers, | |||
int & | centers_length | |||
) | [inline, private] |
Chooses the initial centers in the k-means clustering in a random manner.
Params: k = number of centers vecs = the dataset of points indices = indices in the dataset indices_length = length of indices vector
Definition at line 222 of file kmeans_index.h.
void cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::computeClustering | ( | KMeansNode | node, | |
int * | indices, | |||
int | indices_length, | |||
int | branching, | |||
int | level | |||
) | [inline, private] |
The method responsible with actually doing the recursive hierarchical clustering
Params: node = the node to cluster indices = indices of the points belonging to the current node branching = the branching factor to use in the clustering
TODO: for 1-sized clusters don't store a cluster center (it's the same as the single cluster point)
Definition at line 707 of file kmeans_index.h.
void cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::computeNodeStatistics | ( | KMeansNode | node, | |
int * | indices, | |||
int | indices_length | |||
) | [inline, private] |
Computes the statistics of a node (mean, radius, variance).
Params: node = the node to use indices = the indices of the points belonging to the node
Definition at line 660 of file kmeans_index.h.
int cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::exploreNodeBranches | ( | KMeansNode | node, | |
const ELEM_TYPE * | q, | |||
float * | domain_distances, | |||
Heap< BranchSt > * | heap | |||
) | [inline, private] |
Helper function that computes the nearest childs of a node to a given query point. Params: node = the node q = the query point distances = array with the distances to each child node. Returns:
Definition at line 946 of file kmeans_index.h.
void cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::findExactNN | ( | KMeansNode | node, | |
ResultSet< ELEM_TYPE > & | result, | |||
const ELEM_TYPE * | vec | |||
) | [inline, private] |
Function the performs exact nearest neighbor search by traversing the entire tree.
Definition at line 978 of file kmeans_index.h.
void cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::findNeighbors | ( | ResultSet< ELEM_TYPE > & | result, | |
const ELEM_TYPE * | vec, | |||
const SearchParams & | searchParams | |||
) | [inline, virtual] |
Find set of nearest neighbors to vec. Their indices are stored inside the result object.
Params: result = the result object in which the indices of the nearest-neighbors are stored vec = the vector for which to search the nearest neighbors searchParams = parameters that influence the search algorithm (checks, cb_index)
Implements cvflann::NNIndex< ELEM_TYPE >.
Definition at line 533 of file kmeans_index.h.
void cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::findNN | ( | KMeansNode | node, | |
ResultSet< ELEM_TYPE > & | result, | |||
const ELEM_TYPE * | vec, | |||
int & | checks, | |||
int | maxChecks, | |||
Heap< BranchSt > * | heap | |||
) | [inline, private] |
Performs one descent in the hierarchical k-means tree. The branches not visited are stored in a priority queue.
Params: node = node to explore result = container for the k-nearest neighbors found vec = query points checks = how many points in the dataset have been checked so far maxChecks = maximum dataset points to checks
Definition at line 903 of file kmeans_index.h.
void cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::free_centers | ( | KMeansNode | node | ) | [inline, private] |
Helper function
Definition at line 643 of file kmeans_index.h.
void cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::getCenterOrdering | ( | KMeansNode | node, | |
const ELEM_TYPE * | q, | |||
int * | sort_indices | |||
) | [inline, private] |
Helper function.
I computes the order in which to traverse the child nodes of a particular node.
Definition at line 1020 of file kmeans_index.h.
int cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::getClusterCenters | ( | Matrix< DIST_TYPE > & | centers | ) | [inline] |
Clustering function that takes a cut in the hierarchical k-means tree and return the clusters centers of that clustering. Params: numClusters = number of clusters to have in the clustering computed Returns: number of cluster centers
Definition at line 569 of file kmeans_index.h.
float cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::getDistanceToBorder | ( | float * | p, | |
float * | c, | |||
float * | q | |||
) | [inline, private] |
Method that computes the squared distance from the query point q from inside region with center c to the border between this region and the region with center p
Definition at line 1043 of file kmeans_index.h.
int cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::getMinVarianceClusters | ( | KMeansNode | root, | |
KMeansNode * | clusters, | |||
int | clusters_length, | |||
float & | varianceValue | |||
) | [inline, private] |
Helper function the descends in the hierarchical k-means tree by spliting those clusters that minimize the overall variance of the clustering. Params: root = root node clusters = array with clusters centers (return value) varianceValue = variance of the clustering (return value) Returns:
Definition at line 1067 of file kmeans_index.h.
const IndexParams* cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::getParameters | ( | ) | const [inline, virtual] |
Returns the parameters used for the index
Implements cvflann::NNIndex< ELEM_TYPE >.
Definition at line 595 of file kmeans_index.h.
flann_algorithm_t cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::getType | ( | ) | const [inline, virtual] |
Algorithm name
Implements cvflann::NNIndex< ELEM_TYPE >.
Definition at line 382 of file kmeans_index.h.
void cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::load_tree | ( | FILE * | stream, | |
KMeansNode & | node | |||
) | [inline, private] |
Definition at line 620 of file kmeans_index.h.
void cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::loadIndex | ( | FILE * | stream | ) | [inline, virtual] |
Loads the index from a stream
Implements cvflann::NNIndex< ELEM_TYPE >.
Definition at line 505 of file kmeans_index.h.
void cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::save_tree | ( | FILE * | stream, | |
KMeansNode | node | |||
) | [inline, private] |
Definition at line 604 of file kmeans_index.h.
void cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::saveIndex | ( | FILE * | stream | ) | [inline, virtual] |
Saves the index to a stream
Implements cvflann::NNIndex< ELEM_TYPE >.
Definition at line 493 of file kmeans_index.h.
void cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::set_cb_index | ( | float | index | ) | [inline] |
Definition at line 458 of file kmeans_index.h.
size_t cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::size | ( | ) | const [inline, virtual] |
Returns size of index.
Implements cvflann::NNIndex< ELEM_TYPE >.
Definition at line 444 of file kmeans_index.h.
int cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::usedMemory | ( | ) | const [inline, virtual] |
Computes the inde memory usage Returns: memory used by the index
Implements cvflann::NNIndex< ELEM_TYPE >.
Definition at line 468 of file kmeans_index.h.
size_t cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::veclen | ( | ) | const [inline, virtual] |
Returns the length of an index feature.
Implements cvflann::NNIndex< ELEM_TYPE >.
Definition at line 452 of file kmeans_index.h.
int cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::branching [private] |
The branching factor used in the hierarchical k-means clustering
Definition at line 96 of file kmeans_index.h.
float cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::cb_index [private] |
Cluster border index. This is used in the tree search phase when determining the closest cluster to explore next. A zero value takes into account only the cluster centres, a value greater then zero also take into account the size of the cluster.
Definition at line 110 of file kmeans_index.h.
centersAlgFunction cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::chooseCenters [private] |
The function used for choosing the cluster centers.
Definition at line 208 of file kmeans_index.h.
const Matrix<ELEM_TYPE> cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::dataset [private] |
The dataset used by this index
Definition at line 115 of file kmeans_index.h.
const IndexParams& cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::index_params [private] |
Definition at line 117 of file kmeans_index.h.
int* cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::indices [private] |
Array of indices to vectors in the dataset.
Definition at line 185 of file kmeans_index.h.
int cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::max_iter [private] |
Maximum number of iterations to use when performing k-means clustering
Definition at line 102 of file kmeans_index.h.
int cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::memoryCounter [private] |
Memory occupied by the index.
Definition at line 200 of file kmeans_index.h.
PooledAllocator cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::pool [private] |
Pooled memory allocator.
Using a pooled memory allocator is more efficient than allocating memory directly when there is a large number small of memory allocations.
Definition at line 195 of file kmeans_index.h.
KMeansNode cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::root [private] |
The root node in the tree.
Definition at line 180 of file kmeans_index.h.
size_t cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::size_ [private] |
Number of features in the dataset.
Definition at line 122 of file kmeans_index.h.
size_t cvflann::KMeansIndex< ELEM_TYPE, DIST_TYPE >::veclen_ [private] |
Length of each feature.
Definition at line 127 of file kmeans_index.h.