libvlfeat: vl_gmm.c Source File

Go to the documentation of this file.
00001 
00006 /*
00007 Copyright (C) 2013 David Novotny.
00008 All rights reserved.
00009 
00010 This file is part of the VLFeat library and is made available under
00011 the terms of the BSD license (see the COPYING file).
00012 */
00013 
00014 #include <vl/gmm.h>
00015 #include <mexutils.h>
00016 #include <string.h>
00017 #include <stdio.h>
00018 
00019 enum
00020 {
00021   opt_max_num_iterations,
00022   opt_distance,
00023   opt_initialization,
00024   opt_num_repetitions,
00025   opt_verbose,
00026   opt_means,
00027   opt_covariances,
00028   opt_priors,
00029   opt_covariance_bound
00030 } ;
00031 
00032 vlmxOption  options [] =
00033 {
00034   {"MaxNumIterations",  1,   opt_max_num_iterations  },
00035   {"Verbose",           0,   opt_verbose             },
00036   {"NumRepetitions",    1,   opt_num_repetitions,    },
00037   {"Initialization",    1,   opt_initialization      },
00038   {"Initialisation",    1,   opt_initialization      }, /* UK spelling */
00039   {"InitMeans",         1,   opt_means               },
00040   {"InitCovariances",   1,   opt_covariances         },
00041   {"InitPriors",        1,   opt_priors              },
00042   {"CovarianceBound",   1,   opt_covariance_bound    },
00043   {0,                   0,   0                       }
00044 } ;
00045 
00046 /* driver */
00047 void
00048 mexFunction (int nout, mxArray * out[], int nin, const mxArray * in[])
00049 {
00050   enum {IN_DATA = 0, IN_NUMCLUSTERS, IN_END} ;
00051   enum {OUT_MEANS, OUT_COVARIANCES, OUT_PRIORS, OUT_LL, OUT_POSTERIORS} ;
00052 
00053   int opt ;
00054   int next = IN_END ;
00055   mxArray const  *optarg ;
00056 
00057   vl_size i;
00058 
00059   vl_size numClusters = 10;
00060   vl_size dimension ;
00061   vl_size numData ;
00062 
00063   void * initCovariances = 0 ;
00064   void * initMeans = 0 ;
00065   void * initPriors = 0 ;
00066 
00067   double covarianceScalarBound = VL_NAN_D ;
00068   double const * covarianceBound = NULL ;
00069   void const * data = NULL ;
00070 
00071   vl_size maxNumIterations = 100 ;
00072   vl_size numRepetitions = 1 ;
00073   double LL ;
00074   int verbosity = 0 ;
00075   VlGMMInitialization initialization = VlGMMRand ;
00076   vl_bool initializationSet = VL_FALSE ;
00077 
00078   vl_type dataType ;
00079   mxClassID classID ;
00080 
00081   VlGMM * gmm ;
00082 
00083   VL_USE_MATLAB_ENV ;
00084 
00085   /* -----------------------------------------------------------------
00086    *                                               Check the arguments
00087    * -------------------------------------------------------------- */
00088 
00089   if (nin < 2)
00090   {
00091     vlmxError (vlmxErrInvalidArgument,
00092                "At least two arguments required.");
00093   }
00094   else if (nout > 5)
00095   {
00096     vlmxError (vlmxErrInvalidArgument,
00097                "Too many output arguments.");
00098   }
00099 
00100   classID = mxGetClassID (IN(DATA)) ;
00101   switch (classID) {
00102     case mxSINGLE_CLASS: dataType = VL_TYPE_FLOAT ; break ;
00103     case mxDOUBLE_CLASS: dataType = VL_TYPE_DOUBLE ; break ;
00104     default:
00105       vlmxError (vlmxErrInvalidArgument,
00106                  "DATA is neither of class SINGLE or DOUBLE.") ;
00107       abort() ;
00108   }
00109 
00110   dimension = mxGetM (IN(DATA)) ;
00111   numData = mxGetN (IN(DATA)) ;
00112 
00113   if (dimension == 0)
00114   {
00115     vlmxError (vlmxErrInvalidArgument, "SIZE(DATA,1) is zero.") ;
00116   }
00117 
00118   if (!vlmxIsPlainScalar(IN(NUMCLUSTERS)) ||
00119       (numClusters = (vl_size) mxGetScalar(IN(NUMCLUSTERS))) < 1  ||
00120       numClusters > numData)
00121   {
00122     vlmxError (vlmxErrInvalidArgument,
00123                "NUMCLUSTERS must be a positive integer not greater "
00124                "than the number of data.") ;
00125   }
00126 
00127   while ((opt = vlmxNextOption (in, nin, options, &next, &optarg)) >= 0)
00128   {
00129     char buf [1024] ;
00130 
00131     switch (opt)
00132     {
00133       case opt_verbose : ++ verbosity ; break ;
00134 
00135       case opt_max_num_iterations :
00136         if (!vlmxIsPlainScalar(optarg) || mxGetScalar(optarg) < 0) {
00137           vlmxError (vlmxErrInvalidArgument,
00138                      "MAXNUMITERATIONS must be a non-negative integer scalar") ;
00139         }
00140         maxNumIterations = (vl_size) mxGetScalar(optarg) ;
00141         break ;
00142 
00143       case opt_covariance_bound :
00144         if (vlmxIsPlainScalar(optarg)) {
00145           covarianceScalarBound = mxGetScalar(optarg) ;
00146           continue ;
00147         }
00148         if (!vlmxIsPlainVector(optarg,dimension)) {
00149           vlmxError (vlmxErrInvalidArgument,
00150                      "COVARIANCEBOUND must be a DOUBLE vector of size "
00151                      "equal to the dimension of the data X.") ;
00152         }
00153         covarianceBound = mxGetPr(optarg) ;
00154         break ;
00155 
00156       case opt_priors : {
00157         if (mxGetClassID (optarg) != mxGetClassID(IN(DATA))) {
00158           vlmxError (vlmxErrInvalidArgument, "INITPRIORS is not of the same class as the data X.") ;
00159         }
00160         if (! vlmxIsVector (optarg, numClusters) || ! vlmxIsReal (optarg)) {
00161           vlmxError(vlmxErrInvalidArgument, "INITPRIORS is not a real vector or does not have the correct size.") ;
00162         }
00163         initPriors = mxGetPr(optarg) ;
00164         break ;
00165       }
00166 
00167       case opt_means : {
00168         if (mxGetClassID (optarg) != mxGetClassID(IN(DATA))) {
00169           vlmxError (vlmxErrInvalidArgument, "INITMEANS is not of the same class as the data X.") ;
00170         }
00171         if (! vlmxIsMatrix (optarg, dimension, numClusters) || ! vlmxIsReal (optarg)) {
00172           vlmxError(vlmxErrInvalidArgument, "INITMEANS is not a real matrix or does not have the correct size.") ;
00173         }
00174         initMeans = mxGetPr(optarg) ;
00175         break;
00176       }
00177 
00178       case opt_covariances : {
00179         if (mxGetClassID (optarg) != mxGetClassID(IN(DATA))) {
00180           vlmxError (vlmxErrInvalidArgument, "INITCOVARIANCES is not of the same class as the data X.") ;
00181         }
00182         if (! vlmxIsMatrix (optarg, dimension, numClusters) || ! vlmxIsReal (optarg)) {
00183           vlmxError(vlmxErrInvalidArgument, "INITCOVARIANCES is not a real matrix or does not have the correct size.") ;
00184         }
00185         initCovariances = mxGetPr(optarg) ;
00186         break;
00187       }
00188 
00189       case opt_initialization :
00190         if (!vlmxIsString (optarg, -1))
00191         {
00192           vlmxError (vlmxErrInvalidArgument,
00193                      "INITLAIZATION must be a string.") ;
00194         }
00195         if (mxGetString (optarg, buf, sizeof(buf)))
00196         {
00197           vlmxError (vlmxErrInvalidArgument,
00198                      "INITIALIZATION argument too long.") ;
00199         }
00200         if (vlmxCompareStringsI("rand", buf) == 0) {
00201           initialization = VlGMMRand ;
00202         }
00203         else if (vlmxCompareStringsI("custom", buf) == 0) {
00204           initialization = VlGMMCustom ;
00205         }
00206         else if (vlmxCompareStringsI("kmeans", buf) == 0) {
00207           initialization = VlGMMKMeans ;
00208         }
00209         else {
00210           vlmxError (vlmxErrInvalidArgument,
00211                      "Invalid value '%s' for INITIALIZATION.", buf) ;
00212         }
00213         initializationSet = VL_TRUE ;
00214         break ;
00215 
00216       case opt_num_repetitions :
00217         if (!vlmxIsPlainScalar (optarg)) {
00218           vlmxError (vlmxErrInvalidArgument,
00219                      "NUMREPETITIONS is not a scalar.") ;
00220         }
00221         if (mxGetScalar (optarg) < 1) {
00222           vlmxError (vlmxErrInvalidArgument,
00223                      "NUMREPETITIONS is not larger than or equal to 1.") ;
00224         }
00225         numRepetitions = (vl_size) mxGetScalar (optarg) ;
00226         break ;
00227 
00228       default :
00229         abort() ;
00230         break ;
00231     }
00232   }
00233 
00234   /* -----------------------------------------------------------------
00235    *                                                        Do the job
00236    * -------------------------------------------------------------- */
00237 
00238   data = mxGetPr(IN(DATA)) ;
00239 
00240   switch(dataType){
00241     case VL_TYPE_DOUBLE:
00242       for(i = 0; i < numData*dimension; i++) {
00243         double datum = *((double*)data + i);
00244         if(!(datum < VL_INFINITY_D && datum > -VL_INFINITY_D)){
00245           vlmxError (vlmxErrInvalidArgument,
00246                      "DATA contains NaNs or Infs.") ;
00247         }
00248       }
00249       break;
00250     case VL_TYPE_FLOAT:
00251       for(i = 0; i < numData*dimension; i++) {
00252         float datum = *((float*)data + i);
00253         if(!(datum < VL_INFINITY_F && datum > -VL_INFINITY_F)){
00254           vlmxError (vlmxErrInvalidArgument,
00255                      "DATA contains NaNs or Infs.") ;
00256         }
00257       }
00258       break;
00259     default:
00260       abort();
00261       break;
00262   }
00263 
00264   if (initPriors || initMeans || initCovariances) {
00265     if (!initPriors || !initMeans || !initCovariances) {
00266       vlmxError (vlmxErrInvalidArgument,
00267                  "All or none of INITPRIORS, INITMEANS, "
00268                  "INITCOVARIANCES must be set.") ;
00269     }
00270     if (initializationSet && initialization != VlGMMCustom) {
00271       vlmxError (vlmxErrInvalidArgument,
00272                  "INITPRIORS, INITMEANS, and "
00273                  "INITCOVARIANCES requires 'custom' INITALIZATION.") ;
00274     }
00275     initialization = VlGMMCustom ;
00276   }
00277 
00278   gmm = vl_gmm_new (dataType, dimension, numClusters) ;
00279   vl_gmm_set_verbosity (gmm, verbosity) ;
00280   vl_gmm_set_num_repetitions (gmm, numRepetitions) ;
00281   vl_gmm_set_max_num_iterations (gmm, maxNumIterations) ;
00282   vl_gmm_set_initialization (gmm, initialization) ;
00283 
00284   if (!vl_is_nan_d(covarianceScalarBound)) {
00285     vl_gmm_set_covariance_lower_bound (gmm, covarianceScalarBound) ;
00286   }
00287   if (covarianceBound) {
00288     vl_gmm_set_covariance_lower_bounds (gmm, covarianceBound) ;
00289   }
00290   if (initPriors) {
00291     vl_gmm_set_priors(gmm, initPriors) ;
00292   }
00293   if (initMeans) {
00294     vl_gmm_set_means(gmm, initMeans) ;
00295   }
00296   if (initCovariances) {
00297     vl_gmm_set_covariances(gmm, initCovariances) ;
00298   }
00299 
00300   if (verbosity) {
00301     char const * initializationName = 0 ;
00302 
00303     switch (vl_gmm_get_initialization(gmm)) {
00304       case VlGMMRand : initializationName = "rand" ; break ;
00305       case VlGMMKMeans : initializationName = "kmeans" ; break ;
00306       case VlGMMCustom : initializationName = "custom" ; break ;
00307       default: abort() ;
00308     }
00309 
00310     mexPrintf("vl_gmm: initialization = %s\n", initializationName) ;
00311     mexPrintf("vl_gmm: maxNumIterations = %d\n", vl_gmm_get_max_num_iterations(gmm)) ;
00312     mexPrintf("vl_gmm: numRepetitions = %d\n", vl_gmm_get_num_repetitions(gmm)) ;
00313     mexPrintf("vl_gmm: data type = %s\n", vl_get_type_name(vl_gmm_get_data_type(gmm))) ;
00314     mexPrintf("vl_gmm: data dimension = %d\n", dimension) ;
00315     mexPrintf("vl_gmm: num. data points = %d\n", numData) ;
00316     mexPrintf("vl_gmm: num. Gaussian modes = %d\n", numClusters) ;
00317     mexPrintf("vl_gmm: lower bound on covariance = [") ;
00318     if (dimension < 3) {
00319       for (i = 0 ; i < dimension ; ++i) {
00320         mexPrintf(" %f", vl_gmm_get_covariance_lower_bounds(gmm)[i]) ;
00321       }
00322     } else {
00323       mexPrintf(" %f %f ... %f",
00324                 vl_gmm_get_covariance_lower_bounds(gmm)[0],
00325                 vl_gmm_get_covariance_lower_bounds(gmm)[1],
00326                 vl_gmm_get_covariance_lower_bounds(gmm)[dimension-1]) ;
00327     }
00328     mexPrintf("]\n") ;
00329   }
00330 
00331   /* -------------------------------------------------------------- */
00332   /*                                                     Clustering */
00333   /* -------------------------------------------------------------- */
00334 
00335   LL = vl_gmm_cluster(gmm, data, numData) ;
00336 
00337   /* copy centers */
00338   OUT(MEANS) = mxCreateNumericMatrix (dimension, numClusters, classID, mxREAL) ;
00339   OUT(COVARIANCES) = mxCreateNumericMatrix (dimension, numClusters, classID, mxREAL) ;
00340   OUT(PRIORS) = mxCreateNumericMatrix (numClusters, 1, classID, mxREAL) ;
00341   OUT(POSTERIORS) = mxCreateNumericMatrix (numClusters, numData, classID, mxREAL) ;
00342 
00343   memcpy (mxGetData(OUT(MEANS)),
00344           vl_gmm_get_means (gmm),
00345           vl_get_type_size (dataType) * dimension * vl_gmm_get_num_clusters(gmm)) ;
00346 
00347   memcpy (mxGetData(OUT(COVARIANCES)),
00348           vl_gmm_get_covariances (gmm),
00349           vl_get_type_size (dataType) * dimension * vl_gmm_get_num_clusters(gmm)) ;
00350 
00351   memcpy (mxGetData(OUT(PRIORS)),
00352           vl_gmm_get_priors (gmm),
00353           vl_get_type_size (dataType) * vl_gmm_get_num_clusters(gmm)) ;
00354 
00355   /* optionally return loglikelihood */
00356   if (nout > 3) {
00357     OUT(LL) = vlmxCreatePlainScalar (LL) ;
00358   }
00359 
00360   /* optionally return posterior probabilities */
00361   if (nout > 4) {
00362     memcpy (mxGetData(OUT(POSTERIORS)),
00363             vl_gmm_get_posteriors (gmm),
00364             vl_get_type_size (dataType) * numData * vl_gmm_get_num_clusters(gmm)) ;
00365   }
00366 
00367   vl_gmm_delete (gmm) ;
00368 }