cpl_visual_features: lm_filter_bank.cpp Source File

Go to the documentation of this file.
00001 /*********************************************************************
00002  * Software License Agreement (BSD License)
00003  *
00004  *  Copyright (c) 2010, Georgia Institute of Technology
00005  *  All rights reserved.
00006  *
00007  *  Redistribution and use in source and binary forms, with or without
00008  *  modification, are permitted provided that the following conditions
00009  *  are met:
00010  *
00011  *   * Redistributions of source code must retain the above copyright
00012  *     notice, this list of conditions and the following disclaimer.
00013  *   * Redistributions in binary form must reproduce the above
00014  *     copyright notice, this list of conditions and the following
00015  *     disclaimer in the documentation and/or other materials provided
00016  *     with the distribution.
00017  *   * Neither the name of the Georgia Institute of Technology nor the names of
00018  *     its contributors may be used to endorse or promote products derived
00019  *     from this software without specific prior written permission.
00020  *
00021  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00022  *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00023  *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
00024  *  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
00025  *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
00026  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
00027  *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00028  *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
00029  *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00030  *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
00031  *  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00032  *  POSSIBILITY OF SUCH DAMAGE.
00033  *********************************************************************/
00034 
00035 #include <cpl_visual_features/features/lm_filter_bank.h>
00036 #include <opencv2/ml/ml.hpp>
00037 #include <sstream>
00038 #include <fstream>
00039 #include <algorithm>
00040 
00041 using cv::Mat;
00042 using std::vector;
00043 
00044 namespace cpl_visual_features
00045 {
00046 
00047 //
00048 // Constructors and Initialization
00049 //
00050 LMFilterBank::LMFilterBank(int orientations, int elongated_scales,
00051                            int gaussian_scales, int extract_scales,
00052                            int support) :
00053     n_orientations_(orientations), n_e_scales_(elongated_scales),
00054     n_g_scales_(gaussian_scales), n_extract_scales_(extract_scales),
00055     support_(support), n_filters_(0)
00056 {
00057   generateFilters();
00058 }
00059 
00060 void LMFilterBank::generateFilters(void)
00061 {
00062   // Get the standard deviations to use
00063   vector<float> kernel_sds;
00064   for (int i = 1; i <= std::max(n_e_scales_, n_g_scales_); ++i)
00065   {
00066     float scale_sigma = pow(sqrt(2.0),i);
00067     kernel_sds.push_back(scale_sigma);
00068   }
00069 
00070   // Build the base values for creating the filters
00071   Mat org_pts(2, support_*support_, CV_32FC1);
00072   int h_sup = (support_ - 1) / 2;
00073 
00074   for(int c = 0, v1_count = 0, v1 = -h_sup; c < org_pts.cols; ++c)
00075   {
00076     // Row 1 is -24 49 times, then -23 49 times, ...
00077     org_pts.at<float>(0,c) = v1;
00078     org_pts.at<float>(1,c) = -(c%support_) + h_sup;
00079     v1_count++;
00080     if (v1_count == support_)
00081     {
00082       v1++;
00083       v1_count = 0;
00084     }
00085   }
00086 
00087   // Generate elongated filters
00088   for (int s = 0; s < n_e_scales_; ++s)
00089   {
00090     for (int o = 0; o < n_orientations_; ++o)
00091     {
00092       double theta = M_PI*o/n_orientations_;
00093       double c_theta;
00094       double s_theta;
00095       sincos(theta, &c_theta, &s_theta);
00096       Mat rot_mat(2,2,CV_32FC1);
00097       rot_mat.at<float>(0,0) = c_theta;
00098       rot_mat.at<float>(0,1) = -s_theta;
00099       rot_mat.at<float>(1,0) = s_theta;
00100       rot_mat.at<float>(1,1) = c_theta;
00101       Mat rot_pts = rot_mat*org_pts;
00102       Mat rotpts_1 = rot_pts.row(0);
00103       Mat rotpts_2 = rot_pts.row(1);
00104       Mat bar = createElongatedFilter(kernel_sds[s], 0, 1,
00105                                       rotpts_1, rotpts_2, support_);
00106       Mat edge = createElongatedFilter(kernel_sds[s], 0, 2,
00107                                        rotpts_1, rotpts_2, support_);
00108       bars_.push_back(bar);
00109       edges_.push_back(edge);
00110     }
00111   }
00112 
00113   // Create low-pass gaussian filters and the log filters
00114   for (int s = 0; s < n_g_scales_; ++s)
00115   {
00116     int ksize = support_;
00117     Mat g1 = cv::getGaussianKernel(ksize, kernel_sds[s], CV_32F);
00118     Mat g1_s_n = g1 * g1.t();
00119     gaussians_.push_back(normalize(g1_s_n));
00120 
00121     Mat d1 = getDOGFilter(ksize,     kernel_sds[s]);
00122     Mat d2 = getDOGFilter(ksize, 3.0*kernel_sds[s]);
00123 
00124     dogs_.push_back(normalize(d1));
00125     dogs_.push_back(normalize(d2));
00126   }
00127 
00128   n_filters_ = bars_.size() + edges_.size() + gaussians_.size() + dogs_.size();
00129 }
00130 
00131 //
00132 // Core Functions
00133 //
00134 vector<TextonFeature> LMFilterBank::extractRawFeature(Mat& frame,
00135                                                       bool use_all_pixels)
00136 {
00137   Mat  bw_frame(frame.rows, frame.cols,  CV_8UC1);
00138   Mat base_frame(frame.rows, frame.cols, CV_32FC1);
00139   if (frame.channels() == 3)
00140   {
00141     cvtColor(frame, bw_frame, CV_BGR2GRAY);
00142   }
00143   else
00144   {
00145     bw_frame = frame;
00146   }
00147   bw_frame.convertTo(base_frame, base_frame.type());
00148 
00149   // For most cases we don't need to sample densely...
00150   int row_delta = 4;
00151   int col_delta = 4;
00152   if (use_all_pixels)
00153   {
00154     row_delta = 1;
00155     col_delta = 1;
00156   }
00157 
00158   // Get values based on the step sizes
00159   const int row_elems = frame.rows/row_delta;
00160   const int col_elems = frame.cols/col_delta;
00161 
00162   // Initialize an empty feature vector so that we can simply set values
00163   // directly instead of using push_back
00164   vector<TextonFeature> feats(row_elems*col_elems,
00165                               TextonFeature(n_filters_*n_extract_scales_, 0.0));
00166   vector<Mat> use_frames;
00167   cv::buildPyramid(base_frame, use_frames, n_extract_scales_);
00168   // Counter for all n_filter_*n_extract_scale_ filters
00169   int f_count = 0;
00170 
00171   for (int s = 0; s < n_extract_scales_; ++s)
00172   {
00173     ROS_DEBUG_STREAM("Extracting at scale: " << s);
00174     Mat use_frame = use_frames[s];
00175     // Convolve with the edge filters
00176     for(unsigned int i = 0; i < edges_.size(); ++i, ++f_count)
00177     {
00178       ROS_DEBUG_STREAM("\tExtracting edge filter " << i);
00179       Mat down_convolved;
00180       cv::filter2D(use_frame, down_convolved, -1, edges_[i]);
00181 
00182       Mat convolved = upSampleResponse(down_convolved, s, frame.size());
00183 
00184       for (int r = 0, r_set = 0; r_set < row_elems && r < convolved.rows;
00185            r += row_delta, ++r_set)
00186       {
00187         for (int c = 0, c_set = 0; c_set < col_elems && c < convolved.cols;
00188              c += col_delta, ++c_set)
00189         {
00190           feats[r_set*col_elems + c_set][f_count] = convolved.at<float>(r,c);
00191         }
00192       }
00193     }
00194 
00195     // Convolve with the bar filters
00196     for(unsigned int i = 0; i < bars_.size(); ++i, ++f_count)
00197     {
00198       ROS_DEBUG_STREAM("\tExtracting bar filter " << i);
00199       Mat down_convolved;
00200       cv::filter2D(use_frame, down_convolved, -1, bars_[i]);
00201       Mat convolved = upSampleResponse(down_convolved, s, frame.size());
00202       for (int r = 0, r_set = 0; r_set < row_elems && r < convolved.rows;
00203            r += row_delta, ++r_set)
00204       {
00205         for (int c = 0, c_set = 0; c_set < col_elems && c < convolved.cols;
00206              c += col_delta, ++c_set)
00207         {
00208           feats[r_set*col_elems + c_set][f_count] = convolved.at<float>(r,c);
00209         }
00210       }
00211     }
00212 
00213     // Conovle with the gaussians
00214     for(unsigned int i = 0; i < gaussians_.size(); ++i, ++f_count)
00215     {
00216       ROS_DEBUG_STREAM("\tExtracting gaussian filter " << i);
00217 
00218       Mat down_convolved;
00219       cv::filter2D(use_frame, down_convolved, -1, gaussians_[i]);
00220       Mat convolved = upSampleResponse(down_convolved, s, frame.size());
00221 
00222       for (int r = 0, r_set = 0; r_set < row_elems && r < convolved.rows;
00223            r += row_delta, ++r_set)
00224       {
00225         for (int c = 0, c_set = 0; c_set < col_elems && c < convolved.cols;
00226              c += col_delta, ++c_set)
00227         {
00228           feats[r_set*col_elems + c_set][f_count] = convolved.at<float>(r,c);
00229         }
00230       }
00231     }
00232 
00233     // Convolve with the difference of gaussians
00234     for(unsigned int i = 0; i < dogs_.size(); ++i, ++f_count)
00235     {
00236       ROS_DEBUG_STREAM("\tExtracting DoG filter " << i);
00237       Mat down_convolved;
00238       cv::filter2D(use_frame, down_convolved, -1, dogs_[i]);
00239       Mat convolved = upSampleResponse(down_convolved, s, frame.size());
00240 
00241       for (int r = 0, r_set = 0; r_set < row_elems && r < convolved.rows;
00242            r += row_delta, ++r_set)
00243       {
00244         for (int c = 0, c_set = 0; c_set < col_elems && c < convolved.cols;
00245              c += col_delta, ++c_set)
00246         {
00247           feats[r_set*col_elems + c_set][f_count] = convolved.at<float>(r,c);
00248         }
00249       }
00250     }
00251   }
00252   return feats;
00253 }
00254 
00255 vector<int> LMFilterBank::extractDescriptor(Mat& frame)
00256 {
00257   vector<TextonFeature> raw = extractRawFeature(frame);
00258   vector<int> desc(centers_.size(), 0);
00259   for (unsigned int i = 0; i < raw.size(); ++i)
00260   {
00261     int lbl = quantizeFeature(raw[i]);
00262     desc[lbl] += 1;
00263   }
00264   return desc;
00265 }
00266 
00267 Mat LMFilterBank::textonQuantizeImage(Mat& frame)
00268 {
00269   ROS_INFO_STREAM("Extracting raw features");
00270 
00271   vector<TextonFeature> feat = extractRawFeature(frame, true);
00272   ROS_INFO_STREAM("Quantizing pixels");
00273 
00274   Mat quantized(frame.rows, frame.cols, CV_8UC1);
00275   for (int r = 0, i = 0; r < frame.rows; ++r)
00276   {
00277     for (int c = 0; c < frame.cols; ++c, ++i)
00278     {
00279       // For each raw feature in the image find its label
00280       TextonFeature current = feat[i];
00281       int lbl = quantizeFeature(current);
00282       quantized.at<uchar>(r,c) = lbl;
00283     }
00284   }
00285   return quantized;
00286 }
00287 
00288 int LMFilterBank::quantizeFeature(TextonFeature feat)
00289 {
00290   // TODO: use ANN or a kd-tree or something to speed this up
00291   float min_val = FLT_MAX;
00292   int min_idx = -1;
00293   for (unsigned int i = 0; i < centers_.size(); ++i)
00294   {
00295     float dist = featureDist(feat, centers_[i]);
00296     if (dist < min_val)
00297     {
00298       min_val = dist;
00299       min_idx = i;
00300     }
00301   }
00302 
00303   return min_idx;
00304 }
00305 
00306 vector<TextonFeature> LMFilterBank::clusterTextons(
00307     vector<vector<TextonFeature> > samples, int k, int attempts)
00308 {
00309   // Convert the input samples into the correct format of a row per sample.
00310   int num_rows = samples.size()*samples[0].size();
00311   Mat row_samples(num_rows, n_filters_*n_extract_scales_, CV_32FC1);
00312 
00313   ROS_INFO_STREAM("Converting input data type");
00314 
00315   for (unsigned int i = 0, dst_row = 0; i < samples.size(); ++i)
00316   {
00317     for (unsigned int j = 0; j < samples[i].size(); ++j, ++dst_row)
00318     {
00319       for (unsigned int k = 0; k < samples[i][j].size(); ++k)
00320       {
00321         row_samples.at<float>(dst_row, k) = samples[i][j][k];
00322       }
00323     }
00324   }
00325 
00326   // Construct variables needed for running kmeans
00327   Mat labels;
00328   Mat centers(k, n_filters_*n_extract_scales_, CV_32FC1);
00329   float epsilon = 0.01;
00330   int kmeans_max_iter = 100000;
00331 
00332   ROS_INFO_STREAM("Performing kmeans clustering");
00333 
00334   double compactness = cv::kmeans(row_samples, k, labels,
00335                                   cv::TermCriteria(CV_TERMCRIT_EPS +
00336                                                    CV_TERMCRIT_ITER,
00337                                                    kmeans_max_iter, epsilon),
00338                                   attempts, cv::KMEANS_PP_CENTERS, centers);
00339 
00340   ROS_INFO_STREAM("Compactness is: " << compactness);
00341   ROS_INFO_STREAM("Converting center data type");
00342 
00343   vector<TextonFeature> texton_centers;
00344   for (int r = 0; r < centers.rows; ++r)
00345   {
00346     TextonFeature feat;
00347     for (int c = 0; c < centers.cols; ++c)
00348     {
00349       feat.push_back(centers.at<float>(r,c));
00350     }
00351     texton_centers.push_back(feat);
00352   }
00353   return texton_centers;
00354 }
00355 
00356 //
00357 // Helper functions
00358 //
00359 
00360 Mat LMFilterBank::createElongatedFilter(float scale, int phase_x, int phase_y,
00361                                         Mat pts_1, Mat pts_2, int sup)
00362 {
00363   Mat gx = getGaussianDeriv(3.0*scale, 0, pts_1, phase_x);
00364   Mat gy = getGaussianDeriv(scale, 0, pts_2, phase_y);
00365   Mat gxy = gx.mul(gy);
00366   gxy = gxy.reshape(0, sup);
00367   Mat f = normalize(gxy);
00368   return f;
00369 }
00370 
00371 Mat LMFilterBank::getGaussianDeriv(float sigma, float mu, Mat x, int ord)
00372 {
00373   Mat X = x - mu;
00374   Mat numerator = X.mul(X);
00375   float variance = sigma*sigma;
00376   float denominator = 2.0*variance;
00377   Mat to_exp = (numerator*-1.0)/denominator;
00378   Mat g(X.size(), CV_32FC1, 1.0/(M_PI*sqrt(denominator)) );
00379 
00380   for(int r = 0; r < g.rows; ++r)
00381   {
00382     for(int c = 0; c < g.cols; ++c)
00383     {
00384       g.at<float>(r,c) *= std::exp(to_exp.at<float>(r,c));
00385     }
00386   }
00387 
00388   switch(ord)
00389   {
00390     case 1:
00391       g = -1.0*g.mul(x/variance);
00392       break;
00393     case 2:
00394       g = g.mul((numerator - variance) / (variance*variance));
00395       break;
00396     default:
00397       break;
00398   }
00399 
00400   return g;
00401 }
00402 
00403 Mat LMFilterBank::getDOGFilter(int ksize, float sigma, float alpha)
00404 {
00405   Mat kernel(ksize, ksize, CV_32FC1);
00406   Mat g1 = cv::getGaussianKernel(ksize, sigma, CV_32F);
00407   Mat g2 = cv::getGaussianKernel(ksize, alpha*sigma, CV_32F);
00408   Mat G1 = g1*g1.t();
00409   Mat G2 = g2*g2.t();
00410   Mat G3 = G2 - G1;
00411   return G3;
00412 }
00413 
00414 Mat LMFilterBank::upSampleResponse(Mat& m_s, int s, cv::Size size0)
00415 {
00416   // Upsample from scale s to scale 0
00417   Mat m_s_prime;
00418   Mat temp;
00419   m_s.copyTo(m_s_prime);
00420   m_s.copyTo(temp);
00421 
00422   // Calculate the correct sizes to up sample to
00423   vector<cv::Size> sizes;
00424   cv::Size current_size = size0;
00425   for (int i = 0; i < s; i++)
00426   {
00427     sizes.push_back(current_size);
00428     current_size.width /= 2;
00429     current_size.height /= 2;
00430   }
00431 
00432   for (int i = 0; i < s; i++)
00433   {
00434     cv::Size up_size;
00435     up_size = sizes.back();
00436     sizes.pop_back();
00437     cv::pyrUp(temp, m_s_prime, up_size);
00438     temp = m_s_prime;
00439   }
00440 
00441   return m_s_prime;
00442 }
00443 
00444 Mat LMFilterBank::normalize(Mat& f)
00445 {
00446   Mat n(f.size(), CV_32FC1);
00447   n = f - mean(f)[0];
00448   n = n * 1.0/sum(abs(n))[0];
00449   return n;
00450 }
00451 
00452 float LMFilterBank::featureDist(TextonFeature f1, TextonFeature f2)
00453 {
00454   // Add sanity check for feature lengths
00455   ROS_ASSERT_MSG(f1.size() == f2.size(),
00456                  "f1 has size: %d. f2 has size: %d",
00457                  (int)f1.size(), (int)f2.size());
00458   float dist = 0.0;
00459 
00460   for (unsigned int i = 0; i < f1.size(); ++i)
00461   {
00462     dist += std::abs(f1[i]-f2[i]);
00463   }
00464 
00465   return dist;
00466 }
00467 
00468 //
00469 // User IO methods
00470 //
00471 void LMFilterBank::visualizeKernels(void)
00472 {
00473   for(unsigned int i = 0; i < edges_.size(); ++i)
00474   {
00475     double min_val, max_val;
00476     cv::minMaxLoc(edges_[i], &min_val, &max_val);
00477     Mat viewable;
00478     edges_[i].convertTo(viewable, CV_32F, 1.0/max_val);
00479     std::stringstream name;
00480     name << "Edge" << i << std::endl;
00481     cv::imshow(name.str(), viewable);
00482   }
00483   for(unsigned int i = 0; i < bars_.size(); ++i)
00484   {
00485     double min_val, max_val;
00486     cv::minMaxLoc(bars_[i], &min_val, &max_val);
00487     Mat viewable;
00488     bars_[i].convertTo(viewable, CV_32F, 1.0/max_val);
00489     std::stringstream name;
00490     name << "Bar" << i << std::endl;
00491     cv::imshow(name.str(), viewable);
00492 
00493   }
00494   for(unsigned int i = 0; i < gaussians_.size(); ++i)
00495   {
00496     double min_val, max_val;
00497     cv::minMaxLoc(gaussians_[i], &min_val, &max_val);
00498     Mat viewable;
00499     gaussians_[i].convertTo(viewable, CV_32F, 1.0/max_val);
00500     std::stringstream name;
00501     name << "Gaussian" << i << std::endl;
00502     cv::imshow(name.str(), viewable);
00503 
00504   }
00505   for(unsigned int i = 0; i < dogs_.size(); ++i)
00506   {
00507     double min_val, max_val;
00508     cv::minMaxLoc(dogs_[i], &min_val, &max_val);
00509     Mat viewable;
00510     dogs_[i].convertTo(viewable, CV_32F, 1.0/max_val);
00511     std::stringstream name;
00512     name << "DOGs" << i << std::endl;
00513     cv::imshow(name.str(), viewable);
00514   }
00515   cv::waitKey();
00516 }
00517 
00518 void LMFilterBank::saveTextonCenters(vector<TextonFeature> centers,
00519                                      std::string file_name)
00520 {
00521   ROS_INFO_STREAM("Saving texton centers of size " << centers.size()
00522                    << " to " << file_name.c_str());
00523   std::ofstream output(file_name.c_str());
00524   for (unsigned int i = 0; i < centers.size(); ++i)
00525   {
00526     for (unsigned int j = 0; j < centers[i].size(); ++j)
00527     {
00528       output << centers[i][j];
00529       if (j < centers[i].size() - 1)
00530         output << " ";
00531     }
00532     output << std::endl;
00533   }
00534 
00535   output.close();
00536 }
00537 
00538 bool LMFilterBank::readTextonCenters(std::string file_name)
00539 {
00540   std::ifstream input(file_name.c_str());
00541 
00542   if (!input.good())
00543   {
00544     ROS_ERROR_STREAM("Failed to open file" << file_name);
00545     return false;
00546   }
00547 
00548   // Clear the centers currently loaded in the class
00549   centers_.clear();
00550 
00551   char line[1024];
00552 
00553   // Iterate through the lines of the file
00554   while( input.good() )
00555   {
00556     std::stringstream input_line(std::stringstream::in |
00557                                  std::stringstream::out);
00558     input.getline(line, 1024);
00559     input_line << line;
00560 
00561     TextonFeature center;
00562 
00563     // Push back the line elements
00564     for(int i = 0; i < n_filters_*n_extract_scales_ && !input.eof(); ++i)
00565     {
00566       float val;
00567       input_line >> val;
00568       center.push_back(val);
00569     }
00570     centers_.push_back(center);
00571   }
00572   input.close();
00573 
00574   // This was the best way to ensure all codewords are read in, but no empty
00575   // centers are added
00576   if( centers_.back().size() != centers_[0].size() )
00577   {
00578     centers_.pop_back();
00579   }
00580   ROS_INFO_STREAM("Loaded texton codebook of size: " << centers_.size());
00581   return true;
00582 }
00583 
00584 }