00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035 #include <cpl_visual_features/features/lm_filter_bank.h>
00036 #include <opencv2/ml/ml.hpp>
00037 #include <sstream>
00038 #include <fstream>
00039 #include <algorithm>
00040
00041 using cv::Mat;
00042 using std::vector;
00043
00044 namespace cpl_visual_features
00045 {
00046
00047
00048
00049
00050 LMFilterBank::LMFilterBank(int orientations, int elongated_scales,
00051 int gaussian_scales, int extract_scales,
00052 int support) :
00053 n_orientations_(orientations), n_e_scales_(elongated_scales),
00054 n_g_scales_(gaussian_scales), n_extract_scales_(extract_scales),
00055 support_(support), n_filters_(0)
00056 {
00057 generateFilters();
00058 }
00059
00060 void LMFilterBank::generateFilters(void)
00061 {
00062
00063 vector<float> kernel_sds;
00064 for (int i = 1; i <= std::max(n_e_scales_, n_g_scales_); ++i)
00065 {
00066 float scale_sigma = pow(sqrt(2.0),i);
00067 kernel_sds.push_back(scale_sigma);
00068 }
00069
00070
00071 Mat org_pts(2, support_*support_, CV_32FC1);
00072 int h_sup = (support_ - 1) / 2;
00073
00074 for(int c = 0, v1_count = 0, v1 = -h_sup; c < org_pts.cols; ++c)
00075 {
00076
00077 org_pts.at<float>(0,c) = v1;
00078 org_pts.at<float>(1,c) = -(c%support_) + h_sup;
00079 v1_count++;
00080 if (v1_count == support_)
00081 {
00082 v1++;
00083 v1_count = 0;
00084 }
00085 }
00086
00087
00088 for (int s = 0; s < n_e_scales_; ++s)
00089 {
00090 for (int o = 0; o < n_orientations_; ++o)
00091 {
00092 double theta = M_PI*o/n_orientations_;
00093 double c_theta;
00094 double s_theta;
00095 sincos(theta, &c_theta, &s_theta);
00096 Mat rot_mat(2,2,CV_32FC1);
00097 rot_mat.at<float>(0,0) = c_theta;
00098 rot_mat.at<float>(0,1) = -s_theta;
00099 rot_mat.at<float>(1,0) = s_theta;
00100 rot_mat.at<float>(1,1) = c_theta;
00101 Mat rot_pts = rot_mat*org_pts;
00102 Mat rotpts_1 = rot_pts.row(0);
00103 Mat rotpts_2 = rot_pts.row(1);
00104 Mat bar = createElongatedFilter(kernel_sds[s], 0, 1,
00105 rotpts_1, rotpts_2, support_);
00106 Mat edge = createElongatedFilter(kernel_sds[s], 0, 2,
00107 rotpts_1, rotpts_2, support_);
00108 bars_.push_back(bar);
00109 edges_.push_back(edge);
00110 }
00111 }
00112
00113
00114 for (int s = 0; s < n_g_scales_; ++s)
00115 {
00116 int ksize = support_;
00117 Mat g1 = cv::getGaussianKernel(ksize, kernel_sds[s], CV_32F);
00118 Mat g1_s_n = g1 * g1.t();
00119 gaussians_.push_back(normalize(g1_s_n));
00120
00121 Mat d1 = getDOGFilter(ksize, kernel_sds[s]);
00122 Mat d2 = getDOGFilter(ksize, 3.0*kernel_sds[s]);
00123
00124 dogs_.push_back(normalize(d1));
00125 dogs_.push_back(normalize(d2));
00126 }
00127
00128 n_filters_ = bars_.size() + edges_.size() + gaussians_.size() + dogs_.size();
00129 }
00130
00131
00132
00133
00134 vector<TextonFeature> LMFilterBank::extractRawFeature(Mat& frame,
00135 bool use_all_pixels)
00136 {
00137 Mat bw_frame(frame.rows, frame.cols, CV_8UC1);
00138 Mat base_frame(frame.rows, frame.cols, CV_32FC1);
00139 if (frame.channels() == 3)
00140 {
00141 cvtColor(frame, bw_frame, CV_BGR2GRAY);
00142 }
00143 else
00144 {
00145 bw_frame = frame;
00146 }
00147 bw_frame.convertTo(base_frame, base_frame.type());
00148
00149
00150 int row_delta = 4;
00151 int col_delta = 4;
00152 if (use_all_pixels)
00153 {
00154 row_delta = 1;
00155 col_delta = 1;
00156 }
00157
00158
00159 const int row_elems = frame.rows/row_delta;
00160 const int col_elems = frame.cols/col_delta;
00161
00162
00163
00164 vector<TextonFeature> feats(row_elems*col_elems,
00165 TextonFeature(n_filters_*n_extract_scales_, 0.0));
00166 vector<Mat> use_frames;
00167 cv::buildPyramid(base_frame, use_frames, n_extract_scales_);
00168
00169 int f_count = 0;
00170
00171 for (int s = 0; s < n_extract_scales_; ++s)
00172 {
00173 ROS_DEBUG_STREAM("Extracting at scale: " << s);
00174 Mat use_frame = use_frames[s];
00175
00176 for(unsigned int i = 0; i < edges_.size(); ++i, ++f_count)
00177 {
00178 ROS_DEBUG_STREAM("\tExtracting edge filter " << i);
00179 Mat down_convolved;
00180 cv::filter2D(use_frame, down_convolved, -1, edges_[i]);
00181
00182 Mat convolved = upSampleResponse(down_convolved, s, frame.size());
00183
00184 for (int r = 0, r_set = 0; r_set < row_elems && r < convolved.rows;
00185 r += row_delta, ++r_set)
00186 {
00187 for (int c = 0, c_set = 0; c_set < col_elems && c < convolved.cols;
00188 c += col_delta, ++c_set)
00189 {
00190 feats[r_set*col_elems + c_set][f_count] = convolved.at<float>(r,c);
00191 }
00192 }
00193 }
00194
00195
00196 for(unsigned int i = 0; i < bars_.size(); ++i, ++f_count)
00197 {
00198 ROS_DEBUG_STREAM("\tExtracting bar filter " << i);
00199 Mat down_convolved;
00200 cv::filter2D(use_frame, down_convolved, -1, bars_[i]);
00201 Mat convolved = upSampleResponse(down_convolved, s, frame.size());
00202 for (int r = 0, r_set = 0; r_set < row_elems && r < convolved.rows;
00203 r += row_delta, ++r_set)
00204 {
00205 for (int c = 0, c_set = 0; c_set < col_elems && c < convolved.cols;
00206 c += col_delta, ++c_set)
00207 {
00208 feats[r_set*col_elems + c_set][f_count] = convolved.at<float>(r,c);
00209 }
00210 }
00211 }
00212
00213
00214 for(unsigned int i = 0; i < gaussians_.size(); ++i, ++f_count)
00215 {
00216 ROS_DEBUG_STREAM("\tExtracting gaussian filter " << i);
00217
00218 Mat down_convolved;
00219 cv::filter2D(use_frame, down_convolved, -1, gaussians_[i]);
00220 Mat convolved = upSampleResponse(down_convolved, s, frame.size());
00221
00222 for (int r = 0, r_set = 0; r_set < row_elems && r < convolved.rows;
00223 r += row_delta, ++r_set)
00224 {
00225 for (int c = 0, c_set = 0; c_set < col_elems && c < convolved.cols;
00226 c += col_delta, ++c_set)
00227 {
00228 feats[r_set*col_elems + c_set][f_count] = convolved.at<float>(r,c);
00229 }
00230 }
00231 }
00232
00233
00234 for(unsigned int i = 0; i < dogs_.size(); ++i, ++f_count)
00235 {
00236 ROS_DEBUG_STREAM("\tExtracting DoG filter " << i);
00237 Mat down_convolved;
00238 cv::filter2D(use_frame, down_convolved, -1, dogs_[i]);
00239 Mat convolved = upSampleResponse(down_convolved, s, frame.size());
00240
00241 for (int r = 0, r_set = 0; r_set < row_elems && r < convolved.rows;
00242 r += row_delta, ++r_set)
00243 {
00244 for (int c = 0, c_set = 0; c_set < col_elems && c < convolved.cols;
00245 c += col_delta, ++c_set)
00246 {
00247 feats[r_set*col_elems + c_set][f_count] = convolved.at<float>(r,c);
00248 }
00249 }
00250 }
00251 }
00252 return feats;
00253 }
00254
00255 vector<int> LMFilterBank::extractDescriptor(Mat& frame)
00256 {
00257 vector<TextonFeature> raw = extractRawFeature(frame);
00258 vector<int> desc(centers_.size(), 0);
00259 for (unsigned int i = 0; i < raw.size(); ++i)
00260 {
00261 int lbl = quantizeFeature(raw[i]);
00262 desc[lbl] += 1;
00263 }
00264 return desc;
00265 }
00266
00267 Mat LMFilterBank::textonQuantizeImage(Mat& frame)
00268 {
00269 ROS_INFO_STREAM("Extracting raw features");
00270
00271 vector<TextonFeature> feat = extractRawFeature(frame, true);
00272 ROS_INFO_STREAM("Quantizing pixels");
00273
00274 Mat quantized(frame.rows, frame.cols, CV_8UC1);
00275 for (int r = 0, i = 0; r < frame.rows; ++r)
00276 {
00277 for (int c = 0; c < frame.cols; ++c, ++i)
00278 {
00279
00280 TextonFeature current = feat[i];
00281 int lbl = quantizeFeature(current);
00282 quantized.at<uchar>(r,c) = lbl;
00283 }
00284 }
00285 return quantized;
00286 }
00287
00288 int LMFilterBank::quantizeFeature(TextonFeature feat)
00289 {
00290
00291 float min_val = FLT_MAX;
00292 int min_idx = -1;
00293 for (unsigned int i = 0; i < centers_.size(); ++i)
00294 {
00295 float dist = featureDist(feat, centers_[i]);
00296 if (dist < min_val)
00297 {
00298 min_val = dist;
00299 min_idx = i;
00300 }
00301 }
00302
00303 return min_idx;
00304 }
00305
00306 vector<TextonFeature> LMFilterBank::clusterTextons(
00307 vector<vector<TextonFeature> > samples, int k, int attempts)
00308 {
00309
00310 int num_rows = samples.size()*samples[0].size();
00311 Mat row_samples(num_rows, n_filters_*n_extract_scales_, CV_32FC1);
00312
00313 ROS_INFO_STREAM("Converting input data type");
00314
00315 for (unsigned int i = 0, dst_row = 0; i < samples.size(); ++i)
00316 {
00317 for (unsigned int j = 0; j < samples[i].size(); ++j, ++dst_row)
00318 {
00319 for (unsigned int k = 0; k < samples[i][j].size(); ++k)
00320 {
00321 row_samples.at<float>(dst_row, k) = samples[i][j][k];
00322 }
00323 }
00324 }
00325
00326
00327 Mat labels;
00328 Mat centers(k, n_filters_*n_extract_scales_, CV_32FC1);
00329 float epsilon = 0.01;
00330 int kmeans_max_iter = 100000;
00331
00332 ROS_INFO_STREAM("Performing kmeans clustering");
00333
00334 double compactness = cv::kmeans(row_samples, k, labels,
00335 cv::TermCriteria(CV_TERMCRIT_EPS +
00336 CV_TERMCRIT_ITER,
00337 kmeans_max_iter, epsilon),
00338 attempts, cv::KMEANS_PP_CENTERS, centers);
00339
00340 ROS_INFO_STREAM("Compactness is: " << compactness);
00341 ROS_INFO_STREAM("Converting center data type");
00342
00343 vector<TextonFeature> texton_centers;
00344 for (int r = 0; r < centers.rows; ++r)
00345 {
00346 TextonFeature feat;
00347 for (int c = 0; c < centers.cols; ++c)
00348 {
00349 feat.push_back(centers.at<float>(r,c));
00350 }
00351 texton_centers.push_back(feat);
00352 }
00353 return texton_centers;
00354 }
00355
00356
00357
00358
00359
00360 Mat LMFilterBank::createElongatedFilter(float scale, int phase_x, int phase_y,
00361 Mat pts_1, Mat pts_2, int sup)
00362 {
00363 Mat gx = getGaussianDeriv(3.0*scale, 0, pts_1, phase_x);
00364 Mat gy = getGaussianDeriv(scale, 0, pts_2, phase_y);
00365 Mat gxy = gx.mul(gy);
00366 gxy = gxy.reshape(0, sup);
00367 Mat f = normalize(gxy);
00368 return f;
00369 }
00370
00371 Mat LMFilterBank::getGaussianDeriv(float sigma, float mu, Mat x, int ord)
00372 {
00373 Mat X = x - mu;
00374 Mat numerator = X.mul(X);
00375 float variance = sigma*sigma;
00376 float denominator = 2.0*variance;
00377 Mat to_exp = (numerator*-1.0)/denominator;
00378 Mat g(X.size(), CV_32FC1, 1.0/(M_PI*sqrt(denominator)) );
00379
00380 for(int r = 0; r < g.rows; ++r)
00381 {
00382 for(int c = 0; c < g.cols; ++c)
00383 {
00384 g.at<float>(r,c) *= std::exp(to_exp.at<float>(r,c));
00385 }
00386 }
00387
00388 switch(ord)
00389 {
00390 case 1:
00391 g = -1.0*g.mul(x/variance);
00392 break;
00393 case 2:
00394 g = g.mul((numerator - variance) / (variance*variance));
00395 break;
00396 default:
00397 break;
00398 }
00399
00400 return g;
00401 }
00402
00403 Mat LMFilterBank::getDOGFilter(int ksize, float sigma, float alpha)
00404 {
00405 Mat kernel(ksize, ksize, CV_32FC1);
00406 Mat g1 = cv::getGaussianKernel(ksize, sigma, CV_32F);
00407 Mat g2 = cv::getGaussianKernel(ksize, alpha*sigma, CV_32F);
00408 Mat G1 = g1*g1.t();
00409 Mat G2 = g2*g2.t();
00410 Mat G3 = G2 - G1;
00411 return G3;
00412 }
00413
00414 Mat LMFilterBank::upSampleResponse(Mat& m_s, int s, cv::Size size0)
00415 {
00416
00417 Mat m_s_prime;
00418 Mat temp;
00419 m_s.copyTo(m_s_prime);
00420 m_s.copyTo(temp);
00421
00422
00423 vector<cv::Size> sizes;
00424 cv::Size current_size = size0;
00425 for (int i = 0; i < s; i++)
00426 {
00427 sizes.push_back(current_size);
00428 current_size.width /= 2;
00429 current_size.height /= 2;
00430 }
00431
00432 for (int i = 0; i < s; i++)
00433 {
00434 cv::Size up_size;
00435 up_size = sizes.back();
00436 sizes.pop_back();
00437 cv::pyrUp(temp, m_s_prime, up_size);
00438 temp = m_s_prime;
00439 }
00440
00441 return m_s_prime;
00442 }
00443
00444 Mat LMFilterBank::normalize(Mat& f)
00445 {
00446 Mat n(f.size(), CV_32FC1);
00447 n = f - mean(f)[0];
00448 n = n * 1.0/sum(abs(n))[0];
00449 return n;
00450 }
00451
00452 float LMFilterBank::featureDist(TextonFeature f1, TextonFeature f2)
00453 {
00454
00455 ROS_ASSERT_MSG(f1.size() == f2.size(),
00456 "f1 has size: %d. f2 has size: %d",
00457 (int)f1.size(), (int)f2.size());
00458 float dist = 0.0;
00459
00460 for (unsigned int i = 0; i < f1.size(); ++i)
00461 {
00462 dist += std::abs(f1[i]-f2[i]);
00463 }
00464
00465 return dist;
00466 }
00467
00468
00469
00470
00471 void LMFilterBank::visualizeKernels(void)
00472 {
00473 for(unsigned int i = 0; i < edges_.size(); ++i)
00474 {
00475 double min_val, max_val;
00476 cv::minMaxLoc(edges_[i], &min_val, &max_val);
00477 Mat viewable;
00478 edges_[i].convertTo(viewable, CV_32F, 1.0/max_val);
00479 std::stringstream name;
00480 name << "Edge" << i << std::endl;
00481 cv::imshow(name.str(), viewable);
00482 }
00483 for(unsigned int i = 0; i < bars_.size(); ++i)
00484 {
00485 double min_val, max_val;
00486 cv::minMaxLoc(bars_[i], &min_val, &max_val);
00487 Mat viewable;
00488 bars_[i].convertTo(viewable, CV_32F, 1.0/max_val);
00489 std::stringstream name;
00490 name << "Bar" << i << std::endl;
00491 cv::imshow(name.str(), viewable);
00492
00493 }
00494 for(unsigned int i = 0; i < gaussians_.size(); ++i)
00495 {
00496 double min_val, max_val;
00497 cv::minMaxLoc(gaussians_[i], &min_val, &max_val);
00498 Mat viewable;
00499 gaussians_[i].convertTo(viewable, CV_32F, 1.0/max_val);
00500 std::stringstream name;
00501 name << "Gaussian" << i << std::endl;
00502 cv::imshow(name.str(), viewable);
00503
00504 }
00505 for(unsigned int i = 0; i < dogs_.size(); ++i)
00506 {
00507 double min_val, max_val;
00508 cv::minMaxLoc(dogs_[i], &min_val, &max_val);
00509 Mat viewable;
00510 dogs_[i].convertTo(viewable, CV_32F, 1.0/max_val);
00511 std::stringstream name;
00512 name << "DOGs" << i << std::endl;
00513 cv::imshow(name.str(), viewable);
00514 }
00515 cv::waitKey();
00516 }
00517
00518 void LMFilterBank::saveTextonCenters(vector<TextonFeature> centers,
00519 std::string file_name)
00520 {
00521 ROS_INFO_STREAM("Saving texton centers of size " << centers.size()
00522 << " to " << file_name.c_str());
00523 std::ofstream output(file_name.c_str());
00524 for (unsigned int i = 0; i < centers.size(); ++i)
00525 {
00526 for (unsigned int j = 0; j < centers[i].size(); ++j)
00527 {
00528 output << centers[i][j];
00529 if (j < centers[i].size() - 1)
00530 output << " ";
00531 }
00532 output << std::endl;
00533 }
00534
00535 output.close();
00536 }
00537
00538 bool LMFilterBank::readTextonCenters(std::string file_name)
00539 {
00540 std::ifstream input(file_name.c_str());
00541
00542 if (!input.good())
00543 {
00544 ROS_ERROR_STREAM("Failed to open file" << file_name);
00545 return false;
00546 }
00547
00548
00549 centers_.clear();
00550
00551 char line[1024];
00552
00553
00554 while( input.good() )
00555 {
00556 std::stringstream input_line(std::stringstream::in |
00557 std::stringstream::out);
00558 input.getline(line, 1024);
00559 input_line << line;
00560
00561 TextonFeature center;
00562
00563
00564 for(int i = 0; i < n_filters_*n_extract_scales_ && !input.eof(); ++i)
00565 {
00566 float val;
00567 input_line >> val;
00568 center.push_back(val);
00569 }
00570 centers_.push_back(center);
00571 }
00572 input.close();
00573
00574
00575
00576 if( centers_.back().size() != centers_[0].size() )
00577 {
00578 centers_.pop_back();
00579 }
00580 ROS_INFO_STREAM("Loaded texton codebook of size: " << centers_.size());
00581 return true;
00582 }
00583
00584 }