Integer K-Means clustering. More...


Go to the source code of this file.
| Classes | |
| struct | _VlIKMFilt | 
| IKM quantizer.  More... | |
| Defines | |
| #define | VL_IKMACC_MAX 0x7fffffffUL | 
| Typedefs | |
| typedef vl_int32 | vl_ikmacc_t | 
| typedef struct _VlIKMFilt | VlIKMFilt | 
| IKM quantizer. | |
| Enumerations | |
| enum | VlIKMAlgorithms { VL_IKM_LLOYD, VL_IKM_ELKAN } | 
| IKM algorithms.  More... | |
| Functions | |
| Create and destroy | |
| VL_EXPORT VlIKMFilt * | vl_ikm_new (int method) | 
| Create a new IKM quantizer. | |
| VL_EXPORT void | vl_ikm_delete (VlIKMFilt *f) | 
| Delete IKM quantizer. | |
| Process data | |
| VL_EXPORT void | vl_ikm_init (VlIKMFilt *f, vl_ikmacc_t const *centers, vl_size M, vl_size K) | 
| VL_EXPORT void | vl_ikm_init_rand (VlIKMFilt *f, vl_size M, vl_size K) | 
| VL_EXPORT void | vl_ikm_init_rand_data (VlIKMFilt *f, vl_uint8 const *data, vl_size M, vl_size N, vl_size K) | 
| VL_EXPORT int | vl_ikm_train (VlIKMFilt *f, vl_uint8 const *data, vl_size N) | 
| Train clusters. | |
| VL_EXPORT void | vl_ikm_push (VlIKMFilt *f, vl_uint32 *asgn, vl_uint8 const *data, vl_size N) | 
| Project data to clusters. | |
| VL_EXPORT vl_uint | vl_ikm_push_one (vl_ikmacc_t const *centers, vl_uint8 const *data, vl_size M, vl_size K) | 
| Project one datum to clusters. | |
| Retrieve data and parameters | |
| VL_EXPORT vl_size | vl_ikm_get_ndims (VlIKMFilt const *f) | 
| Get data dimensionality. | |
| VL_EXPORT vl_size | vl_ikm_get_K (VlIKMFilt const *f) | 
| Get the number of centers K. | |
| VL_EXPORT int | vl_ikm_get_verbosity (VlIKMFilt const *f) | 
| Get verbosity level. | |
| VL_EXPORT vl_size | vl_ikm_get_max_niters (VlIKMFilt const *f) | 
| Get maximum number of iterations. | |
| VL_EXPORT vl_ikmacc_t const * | vl_ikm_get_centers (VlIKMFilt const *f) | 
| Get maximum number of iterations. | |
| Set parameters | |
| VL_EXPORT void | vl_ikm_set_verbosity (VlIKMFilt *f, int verb) | 
| Set verbosity level. | |
| VL_EXPORT void | vl_ikm_set_max_niters (VlIKMFilt *f, vl_size max_niters) | 
| Set maximum number of iterations. | |
Integer K-Means clustering.
Integer K-means (IKM) is an implementation of K-means clustering (or Vector Quantization, VQ) for integer data. This is particularly useful for clustering large collections of visual descriptors.
Use the function vl_ikm_new() to create a IKM quantizer. Initialize the IKM quantizer with K clusters by vl_ikm_init() or similar function. Use vl_ikm_train() to train the quantizer. Use vl_ikm_push() or vl_ikm_push_one() to quantize new data.
Given data  and a number of clusters
 and a number of clusters  , the goal is to find assignments
, the goal is to find assignments  and centers
 and centers  so that the expected distortion
 so that the expected distortion
![\[ E(\{a_{i}, c_j\}) = \frac{1}{N} \sum_{i=1}^N d(x_i, c_{a_i}) \]](form_87.png) 
is minimized. Here  is the distortion, i.e. the cost we pay for representing
 is the distortion, i.e. the cost we pay for representing  by
 by  . IKM uses the squared distortion
. IKM uses the squared distortion  .
.
Most K-means algorithms are iterative and needs an initialization in the form of an initial choice of the centers  . We include the following options:
. We include the following options:
K randomly selected data points (vl_ikm_init_rand_data).The Lloyd (also known as Lloyd-Max and LBG) algorithm iteratively:
This algorithm is not particularly efficient because all data points need to be compared to all centers, for a complexity  , where T is the total number of iterations.
, where T is the total number of iterations.
The Elkan algorithm is an optimized variant of Lloyd. By making use of the triangle inequality, many comparisons of data points and centers are avoided, especially at later iterations. Usually 4-5 times less comparisons than Lloyd are preformed, providing a dramatic speedup in the execution time.
Definition in file ikmeans.h.
| #define VL_IKMACC_MAX 0x7fffffffUL | 
| typedef vl_int32 vl_ikmacc_t | 
| typedef struct _VlIKMFilt VlIKMFilt | 
IKM quantizer.
------------------------------------------------------------------
| enum VlIKMAlgorithms | 
| VL_EXPORT void vl_ikm_delete | ( | VlIKMFilt * | f | ) | 
| VL_EXPORT vl_ikmacc_t const* vl_ikm_get_centers | ( | VlIKMFilt const * | f | ) | 
| VL_EXPORT vl_size vl_ikm_get_K | ( | VlIKMFilt const * | f | ) | 
| VL_EXPORT vl_size vl_ikm_get_max_niters | ( | VlIKMFilt const * | f | ) | 
| VL_EXPORT vl_size vl_ikm_get_ndims | ( | VlIKMFilt const * | f | ) | 
| VL_EXPORT int vl_ikm_get_verbosity | ( | VlIKMFilt const * | f | ) | 
| VL_EXPORT void vl_ikm_init | ( | VlIKMFilt * | f, | 
| vl_ikmacc_t const * | centers, | ||
| vl_size | M, | ||
| vl_size | K | ||
| ) | 
| VL_EXPORT void vl_ikm_init_rand | ( | VlIKMFilt * | f, | 
| vl_size | M, | ||
| vl_size | K | ||
| ) | 
| VL_EXPORT void vl_ikm_init_rand_data | ( | VlIKMFilt * | f, | 
| vl_uint8 const * | data, | ||
| vl_size | M, | ||
| vl_size | N, | ||
| vl_size | K | ||
| ) | 
| VL_EXPORT VlIKMFilt* vl_ikm_new | ( | int | method | ) | 
Create a new IKM quantizer.
| method | Clustering algorithm. | 
The function allocates initializes a new IKM quantizer to operate based algorithm method.
method has values in the enumerations VlIKMAlgorithms.
Project data to clusters.
| f | IKM quantizer. | 
| asgn | Assignments (out). | 
| data | data. | 
| N | number of data (N >=1). | 
The function projects the data data on the integer K-means clusters specified by the IKM quantizer f. Notice that the quantizer must be initialized.
| VL_EXPORT vl_uint vl_ikm_push_one | ( | vl_ikmacc_t const * | centers, | 
| vl_uint8 const * | data, | ||
| vl_size | M, | ||
| vl_size | K | ||
| ) | 
| VL_EXPORT void vl_ikm_set_max_niters | ( | VlIKMFilt * | f, | 
| vl_size | max_niters | ||
| ) | 
| VL_EXPORT void vl_ikm_set_verbosity | ( | VlIKMFilt * | f, | 
| int | verb | ||
| ) |