tabletop_pushing: gmm.cpp Source File

Go to the documentation of this file.
00001 #include "gmm.h"
00002 
00003 
00004 float Gaussian::density(const GMMFloatPnt& x) const {
00005 
00006   cv::Mat p(NUM_GMM_DIMS, 1, CV_32F);
00007   cv::Mat pt(1, NUM_GMM_DIMS, CV_32F);
00008   float val;
00009 
00010   for (unsigned int d = 0; d < NUM_GMM_DIMS; d++)
00011     p.at<float>(d, 0) = x[d] - mv[d];
00012   memcpy(pt.data, p.data, NUM_GMM_DIMS*sizeof(float));
00013   cv::Mat v(pt * imat * p);
00014 
00015   val = (float) exp(-0.5 * v.at<float>(0, 0)) / prob_norm;
00016 
00017   return val;
00018 }
00019 
00027 float Gaussian::mahal(const GMMFloatPnt& x) const
00028 {
00029   // Compute the probability of the point (x,y,z), given the mean and
00030   // covariance of the kernel - PRML eq (2.43)
00031 
00032   cv::Mat p(NUM_GMM_DIMS, 1, CV_32F);
00033   cv::Mat pt(1, NUM_GMM_DIMS, CV_32F);
00034 
00035   for (unsigned int d = 0; d < NUM_GMM_DIMS; d++)
00036     p.at<float>(d, 0) = x[d] - mv[d];
00037   memcpy(pt.data, p.data, NUM_GMM_DIMS*sizeof(float));
00038   cv::Mat v(pt * imat * p);
00039   return v.at<float>(0, 0);
00040 }
00041 
00042 void Gaussian::serialize(std::ofstream& fd) const {
00043   int num_dims = NUM_GMM_DIMS;
00044 
00045   fd.write(reinterpret_cast<const char *>(&num_dims), sizeof(int));
00046 
00047   for (int i = 0; i < NUM_GMM_DIMS; i++) {
00048     fd.write(reinterpret_cast<const char *>(&(mv[i])), sizeof(float));
00049   }
00050 
00051   for (int d = 0; d < NUM_GMM_DIMS; d++) {
00052     for (int e = d; e < NUM_GMM_DIMS; e++) {
00053       fd.write(reinterpret_cast<const char *>(&(mat.at<float>(d, e))), sizeof(float));
00054     }
00055   }
00056 }
00057 
00058 
00059 void Gaussian::deserialize(std::ifstream& fd) {
00060   int num_dims;
00061 
00062   fd.read(reinterpret_cast<char *>(&num_dims), sizeof(int));
00063 
00064   if (num_dims != NUM_GMM_DIMS)
00065     throw "The number of dimensions for the Gaussian loaded is inconsistent with NUM_GMM_DIMS";
00066 
00067   for (int i = 0; i < NUM_GMM_DIMS; i++) {
00068     fd.read(reinterpret_cast<char *>(&(mv[i])), sizeof(float));
00069   }
00070 
00071   for (int d = 0; d < NUM_GMM_DIMS; d++) {
00072     for (int e = d; e < NUM_GMM_DIMS; e++) {
00073       fd.read(reinterpret_cast<char *>(&(mat.at<float>(d, e))), sizeof(float));
00074       mat.at<float>(e, d) = mat.at<float>(d, e);
00075     }
00076   }
00077 
00078   imat = cv::Mat(mat.inv());
00079 
00080   // set the probability normalization value
00081   adjustProbNorm();
00082 }
00083 
00084 
00085 int GMM::which_kernel(const GMMFloatPnt& x) const {
00086   // find the kernel in the GMM where the point has the maximum posterior
00087   // probability
00088 
00089   int maxk = 0;
00090   float maxprob = kernel[0].density(x);
00091   float prob;
00092 
00093   for (int i = 1; i < nk; i++) {
00094     prob = kernel[i].density(x);
00095     if (maxprob < prob) {
00096       maxk = i;
00097       maxprob = prob;
00098     }
00099   }
00100 
00101   return maxk;
00102 }
00103 
00104 
00105 double GMM::learn(const std::vector<GMMFloatPnt>& pts) {
00106   // Run Expectation Maximization to learn the GMM
00107   // also returns the log likelihood normalized by the number of pts
00108   // The comments refer to PRML Chapt 9
00109 
00110   // temporary variables for the M step
00111   GMMFloatPnt km;
00112   GMMSigmaVal ks;
00113   float N_k;
00114   float x, y, z;
00115   double likelihood;
00116   int curr_idx;
00117 
00118   float weight[nk];                   // stores the new mixing coefficient
00119                                       // (eq 9.26) - first compute N_k (the
00120                                       // effective number of points assigned to
00121                                       // cluster k) and divide by N at end.
00122   float **prob = new float*[nk];      // stores responsibilities of each data
00123                                       // point for every Gaussian kernel (NxK)
00124   for (int i = 0; i < nk; i++)
00125     prob[i] = new float[pts.size()];
00126   float *px = new float[pts.size()];
00127 
00128   // compute the denominator for the responsibility function - eq (9.23)
00129   for (unsigned int i = 0; i < pts.size(); i++) {
00130     px[i] = probability(pts[i]);
00131   }
00132 
00133   // compute the E and M step for each kernel (parallelizable)
00134   for (int k = 0; k < nk; k++) {
00135     weight[k] = 0.0;
00136 
00137     // compute Expectation (E step)
00138     for (unsigned int i = 0; i < pts.size(); i++) {
00139       // compute the prior * likelihood (for the responsibility posterior)
00140       // i.e. the numerator in eq (9.23)
00141       prob[k][i] = w[k] * kernel[k].density(pts[i]);
00142 
00143       // divide only if evidence is non-zero - eq (9.23)
00144       if (px[i] > 0.0)
00145         prob[k][i] /= px[i];
00146       else
00147         prob[k][i] = 0.0;
00148 
00149       // keep a running tally of N_k - eq (9.27)
00150       weight[k] += prob[k][i];
00151     }
00152 
00153     // compute the mixing coefficient - eq (9.26)
00154     weight[k] /= (float) pts.size();
00155 
00156     // numerator of eq (9.26)
00157     N_k = pts.size() * weight[k];
00158 
00159     // reset values to 0.0
00160     for (unsigned int d = 0; d < NUM_GMM_DIMS; d++)
00161       km[d] = 0.0;
00162     for (unsigned int s = 0; s < NUM_SIGMA_VALS; s++)
00163       ks[s] = 0.0;
00164 
00165     // compute Maximization (M step)
00166     for (unsigned int i = 0; i < pts.size(); i++) {
00167       // accumulate the new mean - eq (9.24)
00168       for (unsigned int d = 0; d < NUM_GMM_DIMS; d++) {
00169         km[d] += prob[k][i] * pts[i][d];
00170       }
00171 
00172       // accumulate the squared parts of eq (9.25)
00173       curr_idx = 0;
00174       for (int d = 0; d < NUM_GMM_DIMS; d++) {
00175         for (int e = d; e < NUM_GMM_DIMS; e++) {
00176           ks[curr_idx] += prob[k][i] * pts[i][d] * pts[i][e];
00177           ++curr_idx;
00178         }
00179       }
00180     }
00181 
00182     // compute the new mean - eq (9.24)
00183     for (int d = 0; d < NUM_GMM_DIMS; d++)
00184       km[d] /= N_k;
00185 
00186     // compute the covariance matrix (Cov(X,Y) = E[XY] - E[X]E[Y]) - eq (9.25)
00187     // also normalize the covariance
00188     for (unsigned int s = 0; s < NUM_SIGMA_VALS; s++)
00189       ks[s] /= N_k;
00190     curr_idx = 0;
00191     for (int d = 0; d < NUM_GMM_DIMS; d++) {
00192       for (int e = d; e < NUM_GMM_DIMS; e++) {
00193         ks[curr_idx] -= km[d] * km[e];
00194         ++curr_idx;
00195       }
00196     }
00197 
00198     // Update mean and covariance matrix on this kernel
00199     kernel[k].SetMean(km);
00200     kernel[k].SetSigma(ks);
00201   }
00202 
00203   // normalize the mixing weight (so they sum to 1)
00204   // and store them into the the GMM model
00205   float sum = 0.0;
00206 
00207   for (int k = 0; k < nk; k++)
00208     sum += weight[k];
00209   for (int k = 0; k < nk; k++) {
00210     // move the mixing weight in the right direction
00211     //w[k] = 0.1 * w[k] + 0.9 * (weight[k] / sum);
00212     w[k] = weight[k] / sum;
00213   }
00214 
00215   // compute the log likelihood - eq (9.28) (parallelizable)
00216   double loglikelihood = 0.0;
00217   for (unsigned int i = 0; i < pts.size(); i++) {
00218     likelihood = probability(pts[i]);
00219     loglikelihood += log(likelihood);
00220   }
00221   loglikelihood /= pts.size();
00222 
00223   for (int i = 0; i < nk; i++)
00224     delete[] prob[i];
00225 
00226   delete[] prob;
00227   delete[] px;
00228 
00229   return loglikelihood;
00230 }
00231 
00232 
00233 double GMM::GmmEm(const std::vector<GMMFloatPnt>& pts) {
00234   double normloglikelihood = 0.0;
00235   double new_normloglikelihood = -std::numeric_limits<double>::max();
00236   double diff;
00237   int iteration = 0;
00238 
00239   std::cout << "EM diff threshold: " << em_thresh << std::endl;
00240   do {
00241     iteration++;
00242 
00243     // transfer old log likelihood value
00244     normloglikelihood = new_normloglikelihood;
00245 
00246     // EM step for GMM
00247     new_normloglikelihood = this->learn(pts);
00248     if (iteration == 1)
00249       diff = std::numeric_limits<double>::quiet_NaN();
00250     else
00251       diff = new_normloglikelihood - normloglikelihood;
00252 
00253     std::cout << "Log likelihood: iter " <<
00254         boost::format("%3d : %+8.4f | %+8.4f") % iteration %
00255         new_normloglikelihood % diff << "\n";
00256 
00257     // stop EM iterations only if difference between this and the last
00258     // iteration is less than a threshold, or the number of iterations
00259     // has increased a certain number (but run EM for atleast a certain
00260     // number of iterations)
00261   } while(iteration <= min_iter ||
00262       (diff > em_thresh && iteration < max_iter));
00263 
00264   if (iteration >= max_iter && new_normloglikelihood - normloglikelihood > em_thresh) {
00265     std::cout << "EM failed to converge after " << iteration << " iterations : "
00266               << new_normloglikelihood;
00267   }
00268 
00269   return new_normloglikelihood;
00270 }
00271 
00272 
00273 void GMM::kmeansInit(const std::vector<GMMFloatPnt>& pts, const float sigma) {
00274   // initialize the kernels by running kmeans
00275 
00276   GMMFloatPnt temp_pnt;
00277 
00278   const int NUM_RETRIES = 3;
00279   const int MAX_ITERS = 100;
00280   const float DIST_THRESH = 0.001;
00281 
00282   cv::Mat bestlabels;
00283   cv::Mat clustercenters;
00284 
00285   cv::Mat rgbmat(pts.size(), 1, CV_32FC(NUM_GMM_DIMS));
00286 
00287   // convert vector to a CV_32FC3 Mat
00288   for (int i = 0; i < rgbmat.rows; i++) {
00289     for (int d = 0; d < NUM_GMM_DIMS; d++)
00290       temp_pnt[d] = pts[i][d];
00291 
00292     rgbmat.at<GMMFloatPnt>(i) = temp_pnt;
00293 
00294     /*
00295     for (int j=0; j < NUM_GMM_DIMS; j++)
00296       std::cout << (float)pts[i][j] << ",";
00297     std::cout << std::endl;
00298     */
00299   }
00300   // std::cout << "Number of rows: " << rgbmat.rows << std::endl;
00301   // std::cout << "Number of cols: " << rgbmat.cols << std::endl;
00302   // std::cout << "nk: " << nk << std::endl;
00303 
00304   // if the number of clusters needed is more than the number of points
00305   if (rgbmat.rows < nk || nk == 0) {
00306     // reduce the number of kernels (reallocate memory)
00307     alloc(rgbmat.rows);
00308     std::cout << "Reduced the number of kernels to " << nk << " because not enough data points available" << std::endl;
00309   }
00310 
00311   // TODO: expose epsilon to user
00312   cv::TermCriteria termcrit(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, MAX_ITERS, 0.0001);
00313 
00314   // run kmeans
00315   kmeans(rgbmat, nk, bestlabels, termcrit, NUM_RETRIES, cv::KMEANS_RANDOM_CENTERS, clustercenters);
00316 
00317   // discard clusters whose centers are a repeat
00318   float dist;
00319 
00320   // // compare all cluster pairs to see if any two clusters are too close to each other
00321   // for (unsigned int i = 1; i < clustercenters.rows; i++) {
00322   //   //std::cout << "Cluster centers: " << clustercenters.rows << std::endl;
00323 
00324   //   for (unsigned int j = 0; j < i; j++) {
00325   //     // check if cluster centers are the same
00326 
00327   //     // compute L2 distance
00328   //     dist = 0.0;
00329   //     for (int d = 0; d < NUM_GMM_DIMS; d++)
00330   //       dist += pow(clustercenters.at<float>(i,d) - clustercenters.at<float>(j,d), 2);
00331   //     dist = sqrt(dist);
00332   //     //std::cout << i << "," << j << " dist: " << dist << std::endl;
00333 
00334   //     // if the two cluster centers are too close to each other
00335   //     if (dist <= DIST_THRESH) {
00336   //       // discard cluster i
00337 
00338   //       // (1) replace all pixels labelled i with label j
00339   //       for (unsigned int k=0; k < bestlabels.rows; k++) {
00340   //         if (bestlabels.at<unsigned int>(k) == i)
00341   //           bestlabels.at<unsigned int>(k) = j;
00342   //       }
00343 
00344   //       // NOTE: Could also be adjusting the mean here of cluster j, but since
00345   //       //  DIST_THRESH we would suppose it doesn't change by much
00346 
00347   //       // (2) shift cluster centers one down the array following i
00348   //       for (unsigned int k=i; k < clustercenters.rows-1; k++) {
00349   //         memcpy(clustercenters.data + clustercenters.step[0]*k,
00350   //                clustercenters.data + clustercenters.step[0]*(k+1),
00351   //                clustercenters.step[0]);
00352   //       }
00353 
00354   //       // (3) shrink size of cluster center array by 1 row
00355   //       clustercenters = clustercenters.rowRange(0, clustercenters.rows-1);
00356   //       i--;
00357 
00358   //       break;
00359   //     }
00360   //   }
00361   // }
00362 
00363   // if (clustercenters.rows < nk) {
00364   //   std::cout << "The number of clusters were reduced from " << nk << " to "
00365   //             << clustercenters.rows << std::endl;
00366 
00367   //   // reduce the number of kernels (reallocate memory)
00368   //   alloc(clustercenters.rows);
00369   // }
00370 
00371   // transfer the cluster means to the kernels. Plus the Gaussians have a fixed
00372   // variance in each direction and no covariance
00373   for (int i = 0; i < clustercenters.rows; i++) {
00374     for (int d = 0; d < NUM_GMM_DIMS; d++)
00375       temp_pnt[d] = clustercenters.at<float>(i,d);
00376 
00377     kernel[i].SetMean(temp_pnt);
00378 
00379     kernel[i].SetSigma(Gaussian::returnFixedSigma(sigma));
00380   }
00381 }
00382 
00383 
00384 void GMM::initkernels(const std::vector<GMMFloatPnt>& pts, const float sigma) {
00385   // initilize kernels by using ramdom sampling within outer boundary
00386   GMMFloatPnt xmin, xmax, temp_mean;
00387 
00388   for (int d = 0; d < NUM_GMM_DIMS; d++)
00389     xmin[d] = xmax[d] = pts[0][d];
00390 
00391   // find the max and min R/G/B values
00392   for (unsigned int i = 0; i < pts.size(); i++) {
00393     for (int d = 0; d < NUM_GMM_DIMS; d++) {
00394       if (pts[i][d] < xmin[d])
00395         xmin[d] = pts[i][d];
00396       if (pts[i][d] > xmax[d])
00397         xmax[d] = pts[i][d];
00398     }
00399   }
00400 
00401   time_t ti;
00402 
00403   time(&ti);
00404   srand(ti);
00405 
00406   // randomly initialize all kernels with means randomly distributed around
00407   // the range of R/G/B values. Plus the Gaussians have a fixed variance in
00408   // each direction and no covariance
00409   for (int i = 0; i < nk; i++) {
00410     for (int d = 0; d < NUM_GMM_DIMS; d++)
00411       temp_mean[d] = xmin[d] + (float)((xmax[d] - xmin[d]) * rand()) / (float) RAND_MAX;
00412 
00413     kernel[i].SetMean(temp_mean);
00414     kernel[i].SetSigma(Gaussian::returnFixedSigma(sigma));
00415   }
00416 }
00417 
00418 
00419 void GMM::savegmm(const fs::path& savefilepath) const {
00420   std::ofstream fd_gmm;
00421 
00422   fd_gmm.open(savefilepath.string().c_str(), std::ofstream::binary | std::ofstream::trunc);
00423 
00424   fd_gmm.write(reinterpret_cast<const char *>(&nk), sizeof(int));
00425   fd_gmm.write(reinterpret_cast<const char *>(&em_thresh), sizeof(double));
00426   fd_gmm.write(reinterpret_cast<const char *>(&max_iter), sizeof(double));
00427   fd_gmm.write(reinterpret_cast<const char *>(&min_iter), sizeof(double));
00428   fd_gmm.write(reinterpret_cast<const char *>(w), sizeof(float)*nk);
00429 
00430   // serialize all kernels
00431   for (int i = 0; i < nk; i++) {
00432     kernel[i].serialize(fd_gmm);
00433   }
00434 
00435   fd_gmm.close();
00436 }
00437 
00438 
00439 void GMM::loadgmm(const fs::path& loadfilepath) {
00440   std::ifstream fd_gmm;
00441 
00442   fd_gmm.open(loadfilepath.string().c_str(), std::ofstream::binary);
00443 
00444   // clear all data before proceeding
00445   free();
00446 
00447   // de-serialize all variables
00448   int num_kernels;
00449   fd_gmm.read(reinterpret_cast<char *>(&num_kernels), sizeof(int));
00450   fd_gmm.read(reinterpret_cast<char *>(&em_thresh), sizeof(double));
00451   fd_gmm.read(reinterpret_cast<char *>(&max_iter), sizeof(double));
00452   fd_gmm.read(reinterpret_cast<char *>(&min_iter), sizeof(double));
00453 
00454   alloc(num_kernels);
00455 
00456   fd_gmm.read(reinterpret_cast<char *>(w), sizeof(float)*nk);
00457 
00458   // de-serialize all kernels
00459   for (int i = 0; i < nk; i++) {
00460     kernel[i].deserialize(fd_gmm);
00461   }
00462 
00463   fd_gmm.close();
00464 }