tabletop_pushing: gmm.h Source File

Go to the documentation of this file.
00001 
00007 #ifndef GMM_H
00008 #define GMM_H
00009 
00010 #define _USE_MATH_DEFINES
00011 
00012 #include <math.h>
00013 #include <time.h>
00014 #include <opencv2/core/core.hpp>
00015 #include <vector>
00016 #include <limits>
00017 #include <iostream>
00018 #include <fstream>
00019 #include <boost/filesystem.hpp>
00020 #include <boost/format.hpp>
00021 
00022 namespace fs = boost::filesystem;
00023 
00024 #define NUM_GMM_DIMS 3
00025 #define HALF_NUM_GMM_DIMS (float)NUM_GMM_DIMS/2
00026 #define NUM_SIGMA_VALS (NUM_GMM_DIMS*(NUM_GMM_DIMS+1))/2
00027 
00028 typedef cv::Vec<float, NUM_GMM_DIMS> GMMFloatPnt;
00029 typedef cv::Vec<float, NUM_SIGMA_VALS> GMMSigmaVal;
00030 
00031 class Gaussian {
00032  private:
00033   cv::Mat imat;
00034   double prob_norm;
00035 
00036 
00037   void adjustProbNorm() {
00038     prob_norm = pow(2 * M_PI, HALF_NUM_GMM_DIMS) * sqrt(determinant(mat));
00039   }
00040 
00041 
00042   void adjustForPSD() {
00043     // All covariance matrix should be positive semi-definite (PSD).
00044     // All eigenvalues of a PSD matrix are positive. After doing the
00045     // eigen-decomposition, all small or negative eigenvalues are converted
00046     // turned positive - and the covariance matrix constructed back
00047 
00048     const float MIN_EIGENVALUE = 1e-2;
00049     const float EIGENVALUE_SCALING = 0.1;
00050 
00051     // This function supposes that atleast one eigenvalue is larger than MIN_VAL
00052 
00053     cv::Mat eigenvalues, eigenvectors;
00054     cv::eigen(mat, eigenvalues, eigenvectors);
00055 
00056     // transpose so each column is a eigenvector
00057     eigenvectors = eigenvectors.t();
00058 
00059     bool eigenval_change = false;
00060     for (int d = 1; d < NUM_GMM_DIMS; d++) {
00061       eigenval_change = eigenval_change || eigenvalues.at<float>(d) < MIN_EIGENVALUE;
00062     }
00063 
00064     // if the last NUM_GMM_DIMS-1 smallest eigenvalues are negative
00065     if (eigenval_change) {
00066       // take a small fraction of the next smallest positive eigenvalue as the
00067       // current eigenvalue (just a heurestic)
00068       for (int d = 1; d < NUM_GMM_DIMS; d++) {
00069         if (eigenvalues.at<float>(d) < MIN_EIGENVALUE)
00070           eigenvalues.at<float>(d) = eigenvalues.at<float>(d-1) * EIGENVALUE_SCALING;
00071         if (eigenvalues.at<float>(d) < MIN_EIGENVALUE)
00072           eigenvalues.at<float>(d) = MIN_EIGENVALUE;
00073       }
00074 
00075       // make a matrix with eigenvalues on the diagonal
00076       cv::Mat eigval_mat = cv::Mat::zeros(NUM_GMM_DIMS, NUM_GMM_DIMS, CV_32F);
00077       for (int d = 0; d < NUM_GMM_DIMS; d++)
00078         eigval_mat.at<float>(d,d) = eigenvalues.at<float>(d);
00079 
00080       // reconstruct the covariance matrix
00081       mat = eigenvectors * eigval_mat * eigenvectors.inv();
00082     }
00083   }
00084 
00085 
00086  public:
00087   // The minimum variance value controls what is the resolving power of the
00088   // GMM for far-away points. Consider that the sigma is diagonal with values
00089   // t i.e. all dimensions have the variance t. Considering that exp(-500)
00090   // is the minimum non-zero value (we can even go further) a machine can
00091   // compute, then a sample point will only have non-zero probability if its
00092   // l_2 square distance is less than 0.5*500*t.
00093   static const float MIN_GMM_VARIANCE = 0.0;
00094 
00095   GMMFloatPnt mv;
00096   cv::Mat mat;
00097 
00098   Gaussian() {
00099     mat = cv::Mat::eye(NUM_GMM_DIMS, NUM_GMM_DIMS, CV_32F);
00100     imat = cv::Mat(mat.inv());
00101 
00102     // get the probability normalization value
00103     adjustProbNorm();
00104 
00105     GMMFloatPnt temp;
00106     for (int d = 0; d < NUM_GMM_DIMS; d++)
00107       temp[d] = std::numeric_limits<float>::quiet_NaN();
00108     SetMean(temp);
00109   }
00110 
00111   void SetMean(const GMMFloatPnt& mean_val) {
00112     mv = mean_val;
00113   }
00114 
00115   GMMSigmaVal GetSigma()
00116   {
00117     GMMSigmaVal cur_sigma;
00118     // set all the values
00119     int curr_idx = 0;
00120     for (int d = 0; d < NUM_GMM_DIMS; d++) {
00121       for (int e = d; e < NUM_GMM_DIMS; e++) {
00122         cur_sigma[curr_idx] = mat.at<float>(d, e);
00123         ++curr_idx;
00124       }
00125     }
00126     return cur_sigma;
00127   }
00128 
00129   void SetSigma(const GMMSigmaVal& gmm_sigma_val) {
00130     // set all the values
00131     int curr_idx = 0;
00132     for (int d = 0; d < NUM_GMM_DIMS; d++) {
00133       for (int e = d; e < NUM_GMM_DIMS; e++) {
00134         mat.at<float>(d, e) = gmm_sigma_val[curr_idx];
00135         mat.at<float>(e, d) = gmm_sigma_val[curr_idx];
00136         ++curr_idx;
00137       }
00138     }
00139 
00140     // Note: variance values need to be positive
00141 
00142     // set the diagonal values
00143     for (int d = 0; d < NUM_GMM_DIMS; d++) {
00144       if (mat.at<float>(d, d) < MIN_GMM_VARIANCE)
00145         mat.at<float>(d, d) = MIN_GMM_VARIANCE;
00146     }
00147 
00148     // perturb matrix if close to negative definite
00149     adjustForPSD();
00150 
00151     imat = cv::Mat(mat.inv());
00152 
00153     // get the probability normalization value
00154     adjustProbNorm();
00155   }
00156 
00157 
00158   void dispparams() const {
00159     for (int d = 0; d < NUM_GMM_DIMS-1; d++)
00160       std::cout << boost::format("%8.3f|") % mv[d];
00161     std::cout << boost::format("%8.3f || ") % mv[NUM_GMM_DIMS-1];
00162 
00163     for (int d = 0; d < NUM_GMM_DIMS; d++) {
00164       std::cout << boost::format("%8.3f|") % mat.at<float>(d, d);
00165     }
00166 
00167     for (int d = 0; d < NUM_GMM_DIMS-1; d++) {
00168       for (int e = d+1; e < NUM_GMM_DIMS; e++) {
00169         std::cout << boost::format("%8.3f|") % mat.at<float>(d, e);
00170       }
00171     }
00172   }
00173 
00174 
00175   static GMMSigmaVal returnFixedSigma(const float& sigma) {
00176     // get the fixed sigma value which diagonal values sigma and off-diagonal 0s
00177     GMMSigmaVal temp_sigma_pnt;
00178     int curr_idx = 0;
00179 
00180     for (int d = 0; d < NUM_GMM_DIMS; d++) {
00181       for (int e = d; e < NUM_GMM_DIMS; e++) {
00182         if (d == e)
00183           temp_sigma_pnt[curr_idx] = sigma;
00184         else
00185           temp_sigma_pnt[curr_idx] = 0;
00186         ++curr_idx;
00187       }
00188     }
00189 
00190     return temp_sigma_pnt;
00191   }
00192 
00193 
00194   float density(const GMMFloatPnt& x) const;
00195   float mahal(const GMMFloatPnt& x) const;
00196   void serialize(std::ofstream& fd) const;
00197   void deserialize(std::ifstream& fd);
00198 };
00199 
00200 
00201 class GMM {
00202  public:
00203   int nk;           // number of mixtures
00204   float *w;         // mixture weights / coefficients
00205   Gaussian *kernel; // the actual Gaussian kernels (would be an array of size nk)
00206   double em_thresh; // the max difference between two EM iterations before stopping
00207   double max_iter;  // maximum number of iterations if GMM doesn't converge
00208   double min_iter;  // minimum number of iterations if GMM is going to run regardless
00209 
00210 
00211   GMM(const double em_threshold=5.0e-2, const int max_iterations=50,
00212       const int min_iterations=5) : nk(0), w(NULL), kernel(NULL), em_thresh(em_threshold),
00213     max_iter(max_iterations), min_iter(min_iterations)
00214   {
00215   }
00216 
00217   GMM(const GMM& x) : nk(x.nk), em_thresh(x.em_thresh), max_iter(x.max_iter), min_iter(x.min_iter)
00218   {
00219     alloc(nk);
00220     for (int i = 0; i < nk; ++i)
00221     {
00222       kernel[i].SetMean(x.kernel[i].mv);
00223       kernel[i].SetSigma(x.kernel[i].GetSigma());
00224       w[i] = x.w[i];
00225     }
00226   }
00227 
00228   GMM& operator=(const GMM& x)
00229   {
00230     if (this != &x)
00231     {
00232       free();
00233       em_thresh = x.em_thresh;
00234       max_iter = x.max_iter;
00235       min_iter = x.min_iter;
00236       alloc(x.nk);
00237       for (int i = 0; i < nk; ++i)
00238       {
00239         kernel[i].SetMean(x.kernel[i].mv);
00240         kernel[i].SetSigma(x.kernel[i].GetSigma());
00241         w[i] = x.w[i];
00242       }
00243     }
00244     return *this;
00245   }
00246 
00247   ~GMM() {
00248     // dtor
00249     free();
00250   }
00251 
00252 
00253   void alloc(const int number_of_kernel) {
00254     // deallocate any previously allocated memory
00255     free();
00256 
00257     nk = number_of_kernel;
00258     kernel = new Gaussian[nk];
00259     w = new float[nk];
00260     for (int i = 0; i < nk; i++) {
00261       w[i] = 1.0 / (float) nk;
00262     }
00263   }
00264 
00265 
00266   void free() {
00267     if (nk != 0) {
00268       if (w != NULL)
00269         delete[] w;
00270       if (kernel != NULL)
00271         delete[] kernel;
00272 
00273       nk = 0;
00274     }
00275   }
00276 
00277 
00278   float probability(const GMMFloatPnt& x) const {
00279     float val = 0.0;
00280 
00281     for (int i = 0; i < nk; i++) {
00282       val += (w[i] * kernel[i].density(x));
00283     }
00284     return val;
00285   }
00286 
00287   float maxProbability(const GMMFloatPnt& x) const {
00288     float max_val = 0.0;
00289     for (int i = 0; i < nk; i++) {
00290       float val = kernel[i].density(x);
00291       if (val > max_val)
00292       {
00293         max_val = val;
00294       }
00295     }
00296     return max_val;
00297   }
00298 
00299 
00300 
00301   float grabCutLikelihood(const GMMFloatPnt& x) const {
00302 
00303     float min_dist = FLT_MAX;
00304     for (int i = 0; i < nk; i++)
00305     {
00306       float d_i = kernel[i].mahal(x);
00307       if (d_i < min_dist)
00308       {
00309         min_dist = d_i;
00310       }
00311     }
00312     return min_dist;
00313   }
00314 
00315 
00316   float minl2clustercenterdist(const GMMFloatPnt& x) const {
00317     float min_dist = std::numeric_limits<float>::max();
00318     float dist;
00319 
00320     for (int i = 0; i < nk; i++) {
00321       dist = 0;
00322       for (int d = 0; d < NUM_GMM_DIMS; d++) {
00323         dist += pow(kernel[i].mv[d] - x[d], 2);
00324       }
00325 
00326       if (dist < min_dist)
00327         min_dist = dist;
00328     }
00329 
00330     return pow(min_dist, 0.5);
00331   }
00332 
00333 
00334   void dispparams() const {
00335     std::cout << "kernel parameters\n";
00336     std::cout << "No.:   weight : ";
00337     for (int d = 0; d < NUM_GMM_DIMS; d++) {
00338       std::cout << boost::format("|   m%c   ") % static_cast<char>('a'+d);
00339     }
00340     std::cout << " ||                 gaussian parameters..\n";
00341     std::cout << "                ";
00342     for (int d = 0; d < NUM_GMM_DIMS; d++) {
00343       std::cout << "|        ";
00344     }
00345     std::cout << " || ";
00346     for (int d = 0; d < NUM_GMM_DIMS; d++) {
00347       std::cout << boost::format("  s_%d%d  |") % d % d;
00348     }
00349 
00350     for (int d = 1; d <= NUM_GMM_DIMS-1; d++) {
00351       for (int e = d+1; e <= NUM_GMM_DIMS; e++) {
00352         std::cout << boost::format("  s_%d%d  |") % d % e;
00353       }
00354     }
00355     std::cout << std::endl;
00356 
00357     for (int i = 0; i < nk; i++) {
00358       std::cout << boost::format("%3d: %8.5f : |") % i % w[i];
00359       kernel[i].dispparams();
00360       std::cout << std::endl;
00361     }
00362   }
00363 
00364 
00365   int which_kernel(const GMMFloatPnt& x) const;
00366   double learn(const std::vector<GMMFloatPnt>& pts);
00367   double GmmEm(const std::vector<GMMFloatPnt>& pts);
00368   void kmeansInit(const std::vector<GMMFloatPnt>& pts, const float sigma);
00369   void initkernels(const std::vector<GMMFloatPnt>& pts, const float sigma);
00370   void savegmm(const fs::path& savefilepath) const;
00371   void loadgmm(const fs::path& loadfilepath);
00372 };
00373 
00374 #endif