sliding_window_object_detector.cpp
Go to the documentation of this file.
1 // @author Krishneel Chaudhary, JSK
2 
4 #include <boost/assign.hpp>
6 #include <jsk_recognition_msgs/Rect.h>
7 #include <jsk_perception/NonMaximumSuppression.h>
8 #if ( CV_MAJOR_VERSION >= 4)
9 #include <opencv2/imgproc/imgproc_c.h>
10 #endif
11 
12 namespace jsk_perception
13 {
15  {
16  DiagnosticNodelet::onInit();
17 
18  this->nms_client_ = pnh_->serviceClient<
19  jsk_perception::NonMaximumSuppression>("non_maximum_suppression");
20  this->srv_ = boost::make_shared<dynamic_reconfigure::Server<
21  jsk_perception::SlidingWindowObjectDetectorConfig> >(*pnh_);
22  dynamic_reconfigure::Server<
23  jsk_perception::SlidingWindowObjectDetectorConfig>::CallbackType f =
24  boost::bind(&SlidingWindowObjectDetector::configCallback, this, _1, _2);
25  this->srv_->setCallback(f);
26 
27  pnh_->getParam("run_type", this->run_type_);
28  pnh_->getParam("trainer_manifest", this->trainer_manifest_filename_);
29  pnh_->param<bool>("override_manifest", this->override_manifest_, false);
30 
31  ROS_INFO("RUN TYPE: %s", run_type_.c_str());
32  ROS_INFO("LOADED TRAINER MANIFEST: %s", trainer_manifest_filename_.c_str());
33 
36  if (this->run_type_.compare("BOOTSTRAPER") == 0) {
37  try {
38  std::string _topic = "/dataset/background/roi";
40  tmp_bag->open(this->ndataset_path_, rosbag::bagmode::Read);
41  ROS_INFO("Bag Found and Opened Successfully ...");
42  std::vector<std::string> topics;
43  topics.push_back(_topic);
44  rosbag::View view(*tmp_bag, rosbag::TopicQuery(topics));
45  std::vector<sensor_msgs::Image> tmp_imgs;
46  BOOST_FOREACH(rosbag::MessageInstance const m, view) {
47  sensor_msgs::Image::ConstPtr img_msg = m.instantiate<
48  sensor_msgs::Image>();
49  tmp_imgs.push_back(*img_msg);
50  }
51  tmp_bag->close();
53  this->rosbag_->open(this->ndataset_path_, rosbag::bagmode::Write);
54  for (std::vector<sensor_msgs::Image>::iterator it = tmp_imgs.begin();
55  it != tmp_imgs.end(); it++) {
56  this->rosbag_->write(_topic, ros::Time::now(), *it);
57  }
58  } catch (ros::Exception &e) {
59  ROS_ERROR("ERROR: Bag File not found..\n%s", e.what());
60  std::_Exit(EXIT_FAILURE);
61  }
62  }
63  this->pub_rects_ = advertise<jsk_recognition_msgs::RectArray>(
64  *pnh_, "output/rects", 1);
65  this->pub_image_ = advertise<sensor_msgs::Image>(
66  *pnh_, "output/image", 1);
68  }
69 
71  {
72  ROS_INFO("Subscribing...");
73  this->sub_image_ = pnh_->subscribe(
74  "input", 1, &SlidingWindowObjectDetector::imageCb, this);
75  ros::V_string names = boost::assign::list_of("~input");
77  }
78 
80  {
81  NODELET_DEBUG("Unsubscribing from ROS topic.");
82  this->sub_image_.shutdown();
83  }
84 
85  void SlidingWindowObjectDetector::imageCb(const sensor_msgs::ImageConstPtr& msg)
86  {
87  cv_bridge::CvImagePtr cv_ptr;
88  try {
90  } catch (cv_bridge::Exception& e) {
91  ROS_ERROR("cv_bridge exception: %s", e.what());
92  return;
93  }
94  cv::Mat image;
95  cv::Size isize = cv_ptr->image.size();
96  // control params
97  const int downsize = this->downsize_;
98  const float scale = this->scale_;
99  const int img_stack = this->stack_size_;
100  const int incrementor = this->incrementor_;
101  cv::resize(cv_ptr->image, image, cv::Size(
102  isize.width/downsize, isize.height/downsize));
103  std::multimap<float, cv::Rect_<int> > detection_info =
104  this->runSlidingWindowDetector(image, cv::Size(
105  this->swindow_x, this->swindow_y),
106  scale, img_stack, incrementor);
107  cv::Mat dimg = image.clone();
108  ROS_INFO("--Info Size: %ld", detection_info.size());
109  for (std::multimap<float, cv::Rect_<int> >::iterator
110  it = detection_info.begin(); it != detection_info.end(); it++) {
111  cv::rectangle(dimg, it->second, cv::Scalar(0, 0, 255), 2);
112  }
113  if (this->run_type_.compare("DETECTOR") == 0) {
114  const float nms_threshold = 0.01;
115  std::vector<cv::Rect_<int> > object_rects = this->nonMaximumSuppression(
116  detection_info, nms_threshold);
117  cv::Mat bimg = image.clone();
118  for (std::vector<cv::Rect_<int> >::iterator it = object_rects.begin();
119  it != object_rects.end(); it++) {
120  this->setBoundingBoxLabel(bimg, *it);
121  cv::rectangle(bimg, *it, cv::Scalar(0, 255, 0), 1);
122  }
123  jsk_recognition_msgs::RectArray jsk_rect_array;
125  object_rects, jsk_rect_array, downsize, isize);
126  jsk_rect_array.header = msg->header;
128  out_msg->header = msg->header;
129  out_msg->encoding = sensor_msgs::image_encodings::BGR8;
130  out_msg->image = bimg.clone();
131  this->pub_rects_.publish(jsk_rect_array);
132  this->pub_image_.publish(out_msg->toImageMsg());
133  } else if (this->run_type_.compare("BOOTSTRAPER") == 0) {
134  for (std::multimap<float, cv::Rect_<int> >::const_iterator
135  it = detection_info.begin(); it != detection_info.end(); it++) {
136  cv::Mat roi = image(it->second).clone();
137  cv::resize(
138  roi, roi, cv::Size(
139  roi.cols * this->downsize_, roi.rows * this->downsize_));
140  if (roi.data) {
141  ROS_INFO("Writing to bag file");
143  write_roi->header = msg->header;
144  write_roi->encoding = sensor_msgs::image_encodings::BGR8;
145  write_roi->image = roi.clone();
146  this->rosbag_->write("/dataset/background/roi",
147  ros::Time::now(), write_roi->toImageMsg());
148  cv::imshow("write_roi", roi);
149  cv::waitKey(3);
150  }
151  }
152  } else {
153  this->pub_image_.publish(cv_ptr->toImageMsg());
154  ROS_ERROR("NODELET RUNTYPE IS NOT SET.");
155  std::_Exit(EXIT_FAILURE);
156  }
157  }
158 
159  std::multimap<float, cv::Rect_<int> >
161  const cv::Mat &image, const cv::Size wsize, const float scale,
162  const int scale_counter, const int incrementor)
163  {
164  if (image.empty()) {
165  ROS_ERROR("--INPUT IMAGE IS EMPTY");
166  return std::multimap<float, cv::Rect_<int> >();
167  }
168  cv::Size nwsize = wsize;
169  int scounter = 0;
170  std::multimap<float, cv::Rect_<int> > detection_info;
171  int sw_incrementor = incrementor;
172  while (scounter++ < scale_counter) {
173  this->objectRecognizer(image, detection_info, nwsize, sw_incrementor);
174  this->pyramidialScaling(nwsize, scale);
175  sw_incrementor += (sw_incrementor * scale);
176  }
177  return detection_info;
178  }
179 
181  const cv::Mat &image, std::multimap<float, cv::Rect_<int> > &detection_info,
182  const cv::Size wsize, const int incrementor)
183  {
184  for (int j = 0; j < image.rows; j += incrementor) {
185  for (int i = 0; i < image.cols; i += incrementor) {
186  cv::Rect_<int> rect = cv::Rect_<int>(i, j, wsize.width, wsize.height);
187  if ((rect.x + rect.width <= image.cols) &&
188  (rect.y + rect.height <= image.rows)) {
189  cv::Mat roi = image(rect).clone();
190  cv::GaussianBlur(roi, roi, cv::Size(3, 3), 1.0);
191  cv::resize(roi, roi, cv::Size(this->swindow_x, this->swindow_y));
192  cv::Mat hog_feature = this->computeHOG(roi);
193  cv::Mat hsv_feature;
194  this->computeHSHistogram(roi, hsv_feature, 16, 16, true);
195  hsv_feature = hsv_feature.reshape(1, 1);
196  cv::Mat _feature = hog_feature;
197  this->concatenateCVMat(hog_feature, hsv_feature, _feature);
198 #if CV_MAJOR_VERSION >= 3
199  cv::Mat _ret;
200  float response = this->supportVectorMachine_->predict(
201  _feature, _ret, false);
202 #else
203  float response = this->supportVectorMachine_->predict(
204  _feature, false);
205 #endif
206  if (response == 1) {
207  detection_info.insert(std::make_pair(response, rect));
208  } else {
209  continue;
210  }
211  }
212  }
213  }
214  }
215 
220  cv::Mat &src, cv::Mat &hist, const int hBin, const int sBin, bool is_norm)
221  {
222  if (src.empty()) {
223  return;
224  }
225  cv::Mat hsv;
226  cv::cvtColor(src, hsv, CV_BGR2HSV);
227  int histSize[] = {hBin, sBin};
228  float h_ranges[] = {0, 180};
229  float s_ranges[] = {0, 256};
230  const float* ranges[] = {h_ranges, s_ranges};
231  int channels[] = {0, 1};
232  cv::calcHist(
233  &hsv, 1, channels, cv::Mat(), hist, 2, histSize, ranges, true, false);
234  if (is_norm) {
235  cv::normalize(hist, hist, 0, 1, cv::NORM_MINMAX, -1, cv::Mat());
236  }
237  }
238 
240  cv::Size &wsize, const float scale)
241  {
242  float nwidth = wsize.width + (wsize.width * scale);
243  float nheight = wsize.height + (wsize.height * scale);
244  const int min_swindow_size = 16;
245  nwidth = (nwidth < min_swindow_size) ? min_swindow_size : nwidth;
246  nheight = (nheight < min_swindow_size) ? min_swindow_size : nheight;
247  wsize = cv::Size(std::abs(nwidth), std::abs(nheight));
248  }
249 
251  std::multimap<float, cv::Rect_<int> > &detection_info,
252  const float nms_threshold)
253  {
254  if (detection_info.empty()) {
255  return std::vector<cv::Rect_<int> >();
256  }
257  jsk_perception::NonMaximumSuppression srv_nms;
258  std::vector<jsk_recognition_msgs::Rect> rect_msg;
259  for (std::multimap<float, cv::Rect_<int> >::iterator
260  it = detection_info.begin(); it != detection_info.end(); it++) {
261  cv::Rect_<int> cv_rect = it->second;
262  jsk_recognition_msgs::Rect jsk_rect;
263  jsk_rect.x = cv_rect.x;
264  jsk_rect.y = cv_rect.y;
265  jsk_rect.width = cv_rect.width;
266  jsk_rect.height = cv_rect.height;
267  srv_nms.request.rect.push_back(jsk_rect);
268  }
269  srv_nms.request.threshold = nms_threshold;
270  std::vector<cv::Rect_<int> > bbox;
271  if (this->nms_client_.call(srv_nms)) {
272  for (int i = 0; i < srv_nms.response.bbox_count; i++) {
273  cv::Rect_<int> brect = cv::Rect_<int>(
274  srv_nms.response.bbox[i].x,
275  srv_nms.response.bbox[i].y,
276  srv_nms.response.bbox[i].width,
277  srv_nms.response.bbox[i].height);
278  bbox.push_back(brect);
279  }
280  } else {
281  ROS_ERROR("Failed to call NonMaximumSuppression Module");
282  return std::vector<cv::Rect_<int> >();
283  }
284  return bbox;
285  }
286 
288  const cv::Mat &mat_1, const cv::Mat &mat_2,
289  cv::Mat &featureMD, bool iscolwise)
290  {
291  if (iscolwise) {
292  featureMD = cv::Mat(mat_1.rows, (mat_1.cols + mat_2.cols), CV_32F);
293  for (int i = 0; i < featureMD.rows; i++) {
294  for (int j = 0; j < mat_1.cols; j++) {
295  featureMD.at<float>(i, j) = mat_1.at<float>(i, j);
296  }
297  for (int j = mat_1.cols; j < featureMD.cols; j++) {
298  featureMD.at<float>(i, j) = mat_2.at<float>(i, j - mat_1.cols);
299  }
300  }
301  } else {
302  featureMD = cv::Mat((mat_1.rows + mat_2.rows), mat_1.cols, CV_32F);
303  for (int i = 0; i < featureMD.cols; i++) {
304  for (int j = 0; j < mat_1.rows; j++) {
305  featureMD.at<float>(j, i) = mat_1.at<float>(j, i);
306  }
307  for (int j = mat_1.rows; j < featureMD.rows; j++) {
308  featureMD.at<float>(j, i) = mat_2.at<float>(j - mat_1.rows, i);
309  }
310  }
311  }
312  }
313 
315  const std::vector<cv::Rect_<int> > &bounding_boxes,
316  jsk_recognition_msgs::RectArray &jsk_rects,
317  const int downsize, const cv::Size img_sz)
318  {
319  for (std::vector<cv::Rect_<int> >::const_iterator it =
320  bounding_boxes.begin(); it != bounding_boxes.end(); it++) {
321  jsk_recognition_msgs::Rect j_r;
322  j_r.x = it->x * downsize;
323  j_r.y = it->y * downsize;
324  j_r.width = it->width * downsize;
325  j_r.height = it->height * downsize;
326  jsk_rects.rects.push_back(j_r);
327  }
328  }
329 
331  {
332  try {
333  ROS_INFO("--Loading Trained SVM Classifier");
334 #if CV_MAJOR_VERSION >= 3 // http://docs.opencv.org/master/d3/d46/classcv_1_1Algorithm.html
335  this->supportVectorMachine_ = cv::ml::SVM::create();
336  this->supportVectorMachine_ = cv::Algorithm::load<cv::ml::SVM>(this->model_name_);
337 #else
339  this->supportVectorMachine_->load(this->model_name_.c_str());
340 #endif
341  ROS_INFO("--Classifier Loaded Successfully");
342  } catch(cv::Exception &e) {
343  ROS_ERROR("--ERROR: Fail to load Classifier \n%s", e.what());
344  std::_Exit(EXIT_FAILURE);
345  }
346  }
347 
349  {
350  cv::FileStorage fs = cv::FileStorage(
351  this->trainer_manifest_filename_, cv::FileStorage::READ);
352  if (!fs.isOpened()) {
353  ROS_ERROR("TRAINER MANIFEST NOT FOUND..");
354  std::_Exit(EXIT_FAILURE);
355  }
356  cv::FileNode n = fs["TrainerInfo"];
357  std::string ttype = n["trainer_type"];
358  std::string tpath = n["trainer_path"];
359 
360  n = fs["FeatureInfo"]; // features used
361  int hog = static_cast<int>(n["HOG"]);
362  int lbp = static_cast<int>(n["LBP"]);
363 
364  n = fs["SlidingWindowInfo"]; // window size
365  int sw_x = static_cast<int>(n["swindow_x"]);
366  int sw_y = static_cast<int>(n["swindow_y"]);
367 
368  n = fs["TrainingDatasetDirectoryInfo"];
369  std::string pfile = n["object_dataset_filename"];
370  std::string nfile = n["nonobject_dataset_filename"];
371  std::string dataset_path = n["dataset_path"];
372 
373  if (this->override_manifest_)
374  {
375  pnh_->param<std::string>("trainer_path", tpath, tpath);
376  pnh_->param<int>("swindow_x", sw_x, sw_x);
377  pnh_->param<int>("swindow_y", sw_y, sw_y);
378  pnh_->param<std::string>("object_dataset_filename", pfile, pfile);
379  pnh_->param<std::string>("nonobject_dataset_filename", nfile, nfile);
380  pnh_->param<std::string>("dataset_path", dataset_path, dataset_path);
381  }
382 
383  this->model_name_ = tpath; // classifier path
384  this->swindow_x = sw_x;
385  this->swindow_y = sw_y;
386  this->object_dataset_filename_ = pfile; // object dataset
387  this->nonobject_dataset_filename_ = nfile;
388  this->ndataset_path_ = dataset_path + nfile; // ~/.ros/dir/negative/x.bag
389 }
390 
392  cv::Mat& im, cv::Rect_<int> rect, const std::string label)
393  {
394  int fontface = cv::FONT_HERSHEY_SIMPLEX;
395  double scale = 0.2;
396  int thickness = 1;
397  int baseline = 0;
398  cv::Size text = cv::getTextSize(
399  label, fontface, scale, thickness, &baseline);
400  cv::Point pt = cv::Point(
401  rect.x + (rect.width-text.width), rect.y + (rect.height+text.height));
402  cv::rectangle(im, pt + cv::Point(0, baseline),
403  pt + cv::Point(text.width, -text.height),
404  CV_RGB(0, 0, 255), CV_FILLED);
405  cv::putText(im, label, pt,
406  fontface, scale, CV_RGB(255, 0, 0), thickness, 8);
407  }
408 
410  jsk_perception::SlidingWindowObjectDetectorConfig &config, uint32_t level)
411  {
412  boost::mutex::scoped_lock lock(mutex_);
413  this->scale_ = static_cast<float>(config.scaling_factor);
414  this->stack_size_ = static_cast<int>(config.stack_size);
415  this->incrementor_ = config.sliding_window_increment;
416  this->downsize_ = config.image_downsize;
417 
418  // currently fixed variables
419  // this->swindow_x = config.sliding_window_width;
420  // this->swindow_y = config.sliding_window_height;
421  }
422 } // namespace jsk_perception
423 
f
void convertCvRectToJSKRectArray(const std::vector< cv::Rect_< int > > &, jsk_recognition_msgs::RectArray &, const int, const cv::Size)
void publish(const boost::shared_ptr< M > &message) const
GLfloat n[6][3]
virtual void imageCb(const sensor_msgs::ImageConstPtr &)
bool call(MReq &req, MRes &res)
virtual void setBoundingBoxLabel(cv::Mat &, cv::Rect_< int >, const std::string="object")
virtual void computeHSHistogram(cv::Mat &, cv::Mat &, const int=64, const int=32, bool=true)
virtual void concatenateCVMat(const cv::Mat &, const cv::Mat &, cv::Mat &, bool=true)
virtual cv::Mat computeHOG(const cv::Mat &)
std::vector< std::string > V_string
virtual std::vector< cv::Rect_< int > > nonMaximumSuppression(std::multimap< float, cv::Rect_< int > > &, const float)
boost::shared_ptr< dynamic_reconfigure::Server< jsk_perception::SlidingWindowObjectDetectorConfig > > srv_
#define ROS_INFO(...)
boost::shared_ptr< ros::NodeHandle > pnh_
virtual std::multimap< float, cv::Rect_< int > > runSlidingWindowDetector(const cv::Mat &, const cv::Size, const float, const int, const int)
bool warnNoRemap(const std::vector< std::string > names)
boost::shared_ptr< T > instantiate() const
PLUGINLIB_EXPORT_CLASS(jsk_perception::SlidingWindowObjectDetector, nodelet::Nodelet)
CvImagePtr toCvCopy(const sensor_msgs::ImageConstPtr &source, const std::string &encoding=std::string())
virtual void pyramidialScaling(cv::Size &, const float)
virtual void configCallback(jsk_perception::SlidingWindowObjectDetectorConfig &, uint32_t)
topics
mutex_t * lock
#define EXIT_FAILURE
static Time now()
virtual void objectRecognizer(const cv::Mat &, std::multimap< float, cv::Rect_< int > > &, const cv::Size, const int=16)
#define ROS_ERROR(...)
#define NODELET_DEBUG(...)


jsk_perception
Author(s): Manabu Saito, Ryohei Ueda
autogenerated on Mon May 3 2021 03:03:27