cpl_visual_features: shape_context.cpp Source File

Go to the documentation of this file.
00001 #include <cpl_visual_features/features/shape_context.h>
00002 #include <cpl_visual_features/extern/lap_cpp/lap.h>
00003 #include <cpl_visual_features/helpers.h>
00004 #include <math.h>
00005 #include <string>
00006 #include <iostream>
00007 
00008 namespace cpl_visual_features
00009 {
00010 
00011 void swapImages(cv::Mat& a, cv::Mat& b)
00012 {
00013   cv::Mat c = a;
00014   a = b;
00015   b = c;
00016 }
00017 
00018 void swapSamples(Samples& a, Samples& b)
00019 {
00020   Samples c = a;
00021   a = b;
00022   b = c;
00023 }
00024 
00025 double compareShapes(cv::Mat& imageA, cv::Mat& imageB, double epsilonCost, bool write_images,
00026                      std::string filePath, int max_displacement, std::string filePostFix)
00027 {
00028   cv::Mat edge_imageA(imageA.size(), imageA.type());
00029   cv::Mat edge_imageB(imageB.size(), imageB.type());
00030   cv::Mat edge_imageA_raw;
00031   cv::Mat edge_imageB_raw;
00032 
00033   // do edge detection
00034   cv::Canny(imageA, edge_imageA, 0.05, 0.5);
00035   cv::Canny(imageB, edge_imageB, 0.05, 0.5);
00036   edge_imageA.copyTo(edge_imageA_raw);
00037   edge_imageB.copyTo(edge_imageB_raw);
00038 
00039   // sample a subset of the edge pixels
00040   // std::cout << "Getting samples points on A" << std::endl;
00041   Samples samplesA = samplePoints(edge_imageA);
00042   // std::cout << "Getting samples points on B" << std::endl;
00043   Samples samplesB = samplePoints(edge_imageB);
00044   // std::cout << "samplesA.size() " << samplesA.size() << std::endl;
00045   // std::cout << "samplesB.size() " << samplesB.size() << std::endl;
00046 
00047   // Swap images if B is shorter than A;
00048   if (samplesA.size() > samplesB.size())
00049   {
00050     swapImages(imageA, imageB);
00051     swapImages(edge_imageA, edge_imageB);
00052     swapImages(edge_imageA_raw, edge_imageB_raw);
00053     swapSamples(samplesA, samplesB);
00054   }
00055 
00056   // construct shape descriptors for each sample
00057   // std::cout << "Getting descriptors for samples A" << std::endl;
00058   ShapeDescriptors descriptorsA = constructDescriptors<Samples>(samplesA);
00059   // std::cout << "Getting descriptors for samples B" << std::endl;
00060   ShapeDescriptors descriptorsB = constructDescriptors<Samples>(samplesB);
00061   // for (int i = 0; i < descriptorsA.size(); ++i)
00062   // {
00063   //   // std::cout << "Filled bins are: [";
00064   //   for (int j = 0; j < descriptorsA[i].size(); ++j)
00065   //   {
00066   //     if (descriptorsA[i][j] > 0)
00067   //     {
00068   //       std::cout << " " << j;
00069   //     }
00070   //   }
00071   //   std::cout << "]" << std::endl;
00072   // }
00073   // for (int i = 0; i < descriptorsB.size(); ++i)
00074   // {
00075   //   std::cout << "Filled bins are: [";
00076   //   for (int j = 0; j < descriptorsB[i].size(); ++j)
00077   //   {
00078   //     if (descriptorsA[i][j] > 0)
00079   //     {
00080   //       std::cout << " " << j;
00081   //     }
00082   //   }
00083   //   std::cout << "]" << std::endl;
00084   // }
00085   // std::cout << "Computing cost matrix" << std::endl;
00086   cv::Mat cost_matrix = computeCostMatrix(descriptorsA, descriptorsB,
00087                                           epsilonCost, write_images, filePath, filePostFix);
00088   // std::cout << "Computed cost matrix" << std::endl;
00089   // save the result
00090   if (write_images)
00091   {
00092     std::stringstream image_a_path_raw;
00093     image_a_path_raw << filePath << "/edge_imageA_raw" << filePostFix << ".bmp";
00094     std::stringstream image_b_path_raw;
00095     image_b_path_raw << filePath << "/edge_imageB_raw" << filePostFix << ".bmp";
00096     cv::imwrite(image_a_path_raw.str().c_str(), edge_imageA_raw);
00097     cv::imwrite(image_b_path_raw.str().c_str(), edge_imageB_raw);
00098     std::stringstream image_a_path;
00099     image_a_path << filePath << "/edge_imageA" << filePostFix << ".bmp";
00100     std::stringstream image_b_path;
00101     image_b_path << filePath << "/edge_imageB" << filePostFix << ".bmp";
00102     cv::imwrite(image_a_path.str().c_str(), edge_imageA);
00103     cv::imwrite(image_b_path.str().c_str(), edge_imageB);
00104   }
00105 
00106   // do bipartite graph matching to find point correspondences
00107   // (uses code from http://www.magiclogic.com/assignment.html)
00108   Path min_path;
00109   double score = getMinimumCostPath(cost_matrix, min_path);
00110   displayMatch(edge_imageA, edge_imageB, samplesA, samplesB, min_path, max_displacement,
00111                filePath, filePostFix);
00112   int sizeA = samplesA.size();
00113   int sizeB = samplesB.size();
00114 
00115   // TODO: Return correspondences as well
00116   return (score-(fabs(sizeA-sizeB)*epsilonCost));
00117 }
00118 
00119 ShapeDescriptors extractDescriptors(cv::Mat& image)
00120 {
00121   cv::Mat edge_image(image.size(), image.type());
00122   cv::Mat edge_image_raw;
00123 
00124   // do edge detection
00125   cv::Canny(image, edge_image, 0.05, 0.5);
00126   edge_image.copyTo(edge_image_raw);
00127 
00128   // sample a subset of the edge pixels
00129   Samples samples = samplePoints(edge_image);
00130 
00131   // construct shape descriptors for each sample
00132   ShapeDescriptors descriptors = constructDescriptors<Samples>(samples);
00133   return descriptors;
00134 }
00135 
00136 Samples samplePoints(cv::Mat& edge_image, double percentage)
00137 {
00138   Samples samples;
00139   Samples all_points;
00140   cv::Scalar pixel;
00141   for (int y=0; y < edge_image.rows; y++)
00142   {
00143     for (int x=0; x < edge_image.cols; x++)
00144     {
00145       if (edge_image.at<uchar>(y, x) > 0)
00146       {
00147         all_points.push_back(cv::Point(x, y));
00148       }
00149     }
00150   }
00151 
00152   // set edge image to black
00153   edge_image = cv::Scalar(0);
00154 
00155   // subsample a percentage of all points
00156   int scale = 1 / percentage;
00157   for (unsigned int i=0; i < all_points.size(); i++)
00158   {
00159     if (i%scale == 0)
00160     {
00161       samples.push_back(all_points.at(i));
00162       edge_image.at<uchar>(samples.back().y, samples.back().x) = 255;
00163     }
00164   }
00165   return samples;
00166 }
00167 
00168 int getHistogramIndex(double radius, double theta, int radius_bins, int theta_bins)
00169 {
00170   if (theta > 1.0) std::cout << "theta is too big: " << theta << std::endl;
00171   if (theta < 0.0) std::cout << "theta is too big: " << theta << std::endl;
00172   int radius_idx = std::max(std::min((int)std::floor(radius*radius_bins), radius_bins-1), 0);
00173   int theta_idx = std::max(std::min((int)std::floor(theta*theta_bins), theta_bins-1),0);
00174   // std::cout << "theta_idx: " << theta_idx << std::endl;
00175   int idx = theta_idx*radius_bins+radius_idx;
00176   return idx;
00177 }
00178 
00179 ShapeDescriptors constructDescriptors(Samples2f& samples,
00180                                       cv::Point2f& center,
00181                                       bool use_center,
00182                                       int radius_bins,
00183                                       int theta_bins,
00184                                       double max_radius,
00185                                       double scale)
00186 {
00187   ShapeDescriptors descriptors;
00188   ShapeDescriptor descriptor;
00189   // double max_radius = 0;
00190   double radius, theta;
00191   double x1, x2, y1, y2;
00192   unsigned int i, j, k, m;
00193 
00194   // find maximum radius for normalization purposes (unless passed in as argument)
00195   if (max_radius == 0.0)
00196   {
00197     for (i=0; i < samples.size(); i++)
00198     {
00199       x1 = samples.at(i).x;
00200       y1 = samples.at(i).y;
00201       for (k=0; k < samples.size(); k++)
00202       {
00203         if (k != i)
00204         {
00205           x2 = samples.at(k).x;
00206           y2 = samples.at(k).y;
00207 
00208           radius = sqrt(pow(x1-x2,2) + pow(y1-y2,2))*scale;
00209           if (radius > max_radius)
00210           {
00211             max_radius = radius;
00212           }
00213         }
00214       }
00215     }
00216   }
00217   else
00218   {
00219     max_radius *= scale;
00220   }
00221   // std::cout << "max_radius is: " << max_radius << std::endl;
00222   max_radius = log(max_radius);
00223   // std::cout << "log(max_radius) is: " << max_radius << std::endl;
00224 
00225   // build a descriptor for each sample
00226   for (i=0; i < samples.size(); i++)
00227   {
00228     // initialize descriptor
00229     descriptor.clear();
00230     for (j=0; j < radius_bins*theta_bins; j++)
00231     {
00232       descriptor.push_back(0);
00233     }
00234 
00235     // Orient descriptor towards center
00236     double center_angle = atan2(samples[i].y - center.y, samples[i].x- center.x);
00237     x1 = samples.at(i).x;
00238     y1 = samples.at(i).y;
00239 
00240     // construct descriptor
00241     for (m=0; m < samples.size(); m++)
00242     {
00243       if (m != i)
00244       {
00245         x2 = samples.at(m).x;
00246         y2 = samples.at(m).y;
00247         radius = sqrt(pow(x1-x2,2) + pow(y1-y2,2))*scale;
00248         radius = log(radius);
00249         radius /= max_radius;
00250         theta = atan2(y1-y2,x1-x2);
00251         // std::cout << "Theta raw: " << theta << std::endl;
00252         if (use_center)
00253         {
00254           // Rotate theta so that center orientation is 0
00255           theta = cpl_visual_features::subPIAngle(theta-center_angle);
00256           // std::cout << "Theta shifted: " << theta << std::endl;
00257         }
00258         // Get in range [0,2pi]
00259         theta += M_PI;
00260         // std::cout << "Theta [0,2pi]: " << theta << std::endl;
00261         // Get theta in range [0,1]
00262         theta /= 2*M_PI;
00263         // std::cout << "Theta [0,1]: " << theta << std::endl;
00264         int idx = getHistogramIndex(radius, theta, radius_bins, theta_bins);
00265         // std::cout << "idx: " << idx << std::endl;
00266         descriptor[idx]++;
00267       }
00268     }
00269 
00270     // add descriptor to std::vector of descriptors
00271     descriptors.push_back(descriptor);
00272   }
00273 
00274   return descriptors;
00275 }
00276 
00277 // template <class sample_type> ShapeDescriptors constructDescriptors(sample_type& samples,
00278 //                                                                    unsigned int radius_bins,
00279 //                                                                    unsigned int theta_bins)
00280 // {
00281 //   ShapeDescriptors descriptors;
00282 //   ShapeDescriptor descriptor;
00283 //   double max_radius = 0;
00284 //   double radius, theta;
00285 //   double x1, x2, y1, y2;
00286 //   unsigned int i, j, k, m;
00287 
00288 //   // find maximum radius for normalization purposes
00289 //   for (i=0; i < samples.size(); i++)
00290 //   {
00291 //     for (k=0; k < samples.size(); k++)
00292 //     {
00293 //       if (k != i)
00294 //       {
00295 //         x1 = samples.at(i).x;
00296 //         y1 = samples.at(i).y;
00297 //         x2 = samples.at(k).x;
00298 //         y2 = samples.at(k).y;
00299 
00300 //         radius = sqrt(pow(x1-x2,2) + pow(y1-y2,2));
00301 //         if (radius > max_radius)
00302 //         {
00303 //           max_radius = radius;
00304 //         }
00305 //       }
00306 //     }
00307 //   }
00308 //   max_radius = log(max_radius);
00309 //   // std::cout << "Got max_radius of: " << max_radius << std::endl;
00310 
00311 //   // build a descriptor for each sample
00312 //   for (i=0; i < samples.size(); i++)
00313 //   {
00314 //     // initialize descriptor
00315 //     descriptor.clear();
00316 //     for (j=0; j < radius_bins*theta_bins; j++)
00317 //     {
00318 //       descriptor.push_back(0);
00319 //     }
00320 //     x1 = samples.at(i).x;
00321 //     y1 = samples.at(i).y;
00322 
00323 //     // construct descriptor
00324 //     for (m=0; m < samples.size(); m++)
00325 //     {
00326 //       if (m != i)
00327 //       {
00328 //         // std::cout << "Constructing descriptor for (" << i << ", " << m << ")" << std::endl;
00329 //         x2 = samples.at(m).x;
00330 //         y2 = samples.at(m).y;
00331 
00332 //         radius = sqrt(pow(x1-x2,2) + pow(y1-y2,2));
00333 //         radius = log(radius);
00334 //         radius /= max_radius;
00335 //         theta = atan(fabs(y1-y2) / fabs(x1-x2));
00336 //         theta += M_PI;
00337 //         if (y1-y2 < 0)
00338 //         {
00339 //           theta += M_PI;
00340 //         }
00341 //         theta /= 2*M_PI;
00342 //         // std::cout << "Getting idx for (" << radius << ", " << theta << ")" << std::endl;
00343 //         int idx = getHistogramIndex(radius, theta, radius_bins, theta_bins);
00344 //         // std::cout << "Idx is: " << idx << std::endl;
00345 //         descriptor.at(idx)++;
00346 //       }
00347 //     }
00348 
00349 //     // add descriptor to std::vector of descriptors
00350 //     descriptors.push_back(descriptor);
00351 //   }
00352 
00353 //   return descriptors;
00354 // }
00355 
00356 cv::Mat computeCostMatrix(ShapeDescriptors& descriptorsA,
00357                           ShapeDescriptors& descriptorsB,
00358                           double epsilonCost,
00359                           bool write_images,
00360                           std::string filePath, std::string filePostFix)
00361 {
00362   int mat_size = std::max(descriptorsA.size(), descriptorsB.size());
00363   cv::Mat cost_matrix(mat_size, mat_size, CV_64FC1, 0.0f);
00364   double d_cost, hi, hj;
00365   ShapeDescriptor& descriptorA = descriptorsA.front();
00366   ShapeDescriptor& descriptorB = descriptorsB.front();
00367 
00368   // initialize cost matrix for dummy values
00369   for (int i=0; i < cost_matrix.rows; i++)
00370   {
00371     for (int j=0; j < cost_matrix.cols; j++)
00372     {
00373       cost_matrix.at<double>(i,j) = epsilonCost;
00374     }
00375   }
00376 
00377   for (unsigned int i=0; i < descriptorsA.size(); i++)
00378   {
00379     descriptorA = descriptorsA.at(i);
00380     for (unsigned int j=0; j < descriptorsB.size(); j++)
00381     {
00382       descriptorB = descriptorsB.at(j);
00383       d_cost = 0;
00384 
00385       // compute cost between shape context i and j
00386       // using chi-squared test statistic
00387       for (unsigned int k=0; k < descriptorA.size(); k++)
00388       {
00389         hi = descriptorA.at(k) / (descriptorsA.size() - 1); // normalized bin val
00390         hj = descriptorB.at(k) / (descriptorsB.size() - 1); // normalized bin val
00391         if (hi + hj > 0)
00392         {
00393           d_cost += pow(hi-hj, 2) / (hi + hj);
00394         }
00395       }
00396       d_cost /= 2;
00397       cost_matrix.at<double>(i,j) = d_cost;
00398     }
00399   }
00400   if (write_images)
00401   {
00402     cv::Mat int_cost_matrix;
00403     cost_matrix.convertTo(int_cost_matrix, CV_8UC1, 255);
00404     std::stringstream cost_matrix_path;
00405     cost_matrix_path << filePath << "/cost_matrix" << filePostFix << ".bmp";
00406     cv::imwrite(cost_matrix_path.str().c_str(), int_cost_matrix);
00407   }
00408   // double min_val, max_val;
00409   // cv::minMaxLoc(cost_matrix, &min_val, &max_val);
00410   // std::cout << "Cost range: [" << min_val << ", " << max_val << "]" << std::endl;
00411   return cost_matrix;
00412 }
00413 
00414 double getMinimumCostPath(cv::Mat& cost_matrix, Path& path)
00415 {
00416   const int dim = cost_matrix.rows;
00417   LapCost **cost_mat;
00418   cost_mat = new LapCost*[dim];
00419   // std::cout << "Allocating cost matrix of size " << dim << std::endl;
00420   for (int r = 0; r < dim; ++r)
00421   {
00422     cost_mat[r] = new LapCost[dim];
00423   }
00424   // std::cout << "Populating cost matrix" << std::endl;
00425   for (int r = 0; r < dim; ++r)
00426   {
00427     for (int c = 0; c < dim; ++c)
00428     {
00429       cost_mat[r][c] = (float)cost_matrix.at<double>(r,c);
00430     }
00431   }
00432   LapRow* rowsol;
00433   LapCol* colsol;
00434   LapCost* u;
00435   LapCost* v;
00436   rowsol = new LapCol[dim];
00437   colsol = new LapRow[dim];
00438   u = new LapCost[dim];
00439   v = new LapCost[dim];
00440   // std::cout << "Running lap" << std::endl;
00441   LapCost match_cost = lap(dim, cost_mat, rowsol, colsol, u, v);
00442   // std::cout << "Ran lap" << std::endl;
00443   for (int r = 0; r < dim; ++r)
00444   {
00445     int c = rowsol[r];
00446     path.push_back(c);
00447   }
00448   // std::cout << "Converted lap result" << std::endl;
00449   for (int r = 0; r < dim; ++r)
00450   {
00451     delete cost_mat[r];
00452   }
00453   delete cost_mat;
00454   delete u;
00455   delete v;
00456   delete rowsol;
00457   delete colsol;
00458   return match_cost;
00459 }
00460 
00461 void displayMatch(cv::Mat& edge_imageA, cv::Mat& edge_imageB,
00462                   Samples& samplesA, Samples& samplesB,
00463                   Path& path, int max_displacement, std::string filePath,
00464                   std::string filePostFix)
00465 {
00466   int a_row_offset = 0;
00467   int b_row_offset = 0;
00468   int a_col_offset = 0;
00469   int b_col_offset = 0;
00470   if (edge_imageA.rows > edge_imageB.rows)
00471   {
00472     b_row_offset = (edge_imageA.rows - edge_imageB.rows)/2;
00473   }
00474   else if (edge_imageA.rows < edge_imageB.rows)
00475   {
00476     a_row_offset = (edge_imageB.rows - edge_imageA.rows)/2;
00477   }
00478   if (edge_imageA.cols > edge_imageB.cols)
00479   {
00480     b_col_offset = (edge_imageA.cols - edge_imageB.cols)/2;
00481   }
00482   else if (edge_imageA.cols < edge_imageB.cols)
00483   {
00484     a_col_offset = (edge_imageB.cols - edge_imageA.cols)/2;
00485   }
00486   cv::Mat disp_img(std::max(edge_imageA.rows, edge_imageB.rows),
00487                    std::max(edge_imageA.cols, edge_imageB.cols), CV_8UC3, cv::Scalar(0,0,0));
00488   // Display image A
00489   for (int r=0; r < edge_imageA.rows; r++)
00490   {
00491     for (int c=0; c < edge_imageA.cols; c++)
00492     {
00493       if (edge_imageA.at<uchar>(r,c) > 0)
00494         disp_img.at<cv::Vec3b>(r+a_row_offset, c+a_col_offset) = cv::Vec3b(255,0,0);
00495     }
00496   }
00497 
00498   // Display image B as another color
00499   for (int r=0; r < edge_imageB.rows; r++)
00500   {
00501     for (int c=0; c < edge_imageB.cols; c++)
00502     {
00503       if (edge_imageB.at<uchar>(r,c) > 0)
00504         disp_img.at<cv::Vec3b>(r+b_row_offset, c+b_col_offset) = cv::Vec3b(0,0,255);
00505     }
00506   }
00507 
00508   for (unsigned int i = 0; i < samplesA.size(); ++i)
00509   {
00510     cv::Point start_point = samplesA[i];
00511     start_point.x += a_col_offset;
00512     start_point.y += a_row_offset;
00513     cv::Point end_point = samplesB[path[i]];
00514     end_point.x += b_col_offset;
00515     end_point.y += b_row_offset;
00516     if (std::abs(start_point.x - end_point.x) +
00517         std::abs(start_point.y - end_point.y) < max_displacement &&
00518         end_point.x > 0 && end_point.x < disp_img.rows &&
00519         end_point.y > 0 && end_point.x < disp_img.cols)
00520     {
00521       cv::line(disp_img, start_point, end_point, cv::Scalar(0,255,0));
00522     }
00523   }
00524   std::stringstream match_path;
00525   match_path << filePath << "/matches" << filePostFix << ".bmp";
00526   cv::imwrite(match_path.str().c_str(), disp_img);
00527   cv::imshow("match", disp_img);
00528   cv::imshow("edgesA", edge_imageA);
00529   cv::imshow("edgesB", edge_imageB);
00530   cv::waitKey();
00531 }
00532 };