00001
00002
00003 #include <jsk_perception/sliding_window_object_detector.h>
00004 #include <boost/assign.hpp>
00005 #include <jsk_topic_tools/log_utils.h>
00006 #include <jsk_recognition_msgs/Rect.h>
00007 #include <jsk_perception/NonMaximumSuppression.h>
00008
00009 namespace jsk_perception
00010 {
00011 void SlidingWindowObjectDetector::onInit()
00012 {
00013 DiagnosticNodelet::onInit();
00014
00015 this->nms_client_ = pnh_->serviceClient<
00016 jsk_perception::NonMaximumSuppression>("non_maximum_suppression");
00017 this->srv_ = boost::make_shared<dynamic_reconfigure::Server<
00018 jsk_perception::SlidingWindowObjectDetectorConfig> >(*pnh_);
00019 dynamic_reconfigure::Server<
00020 jsk_perception::SlidingWindowObjectDetectorConfig>::CallbackType f =
00021 boost::bind(&SlidingWindowObjectDetector::configCallback, this, _1, _2);
00022 this->srv_->setCallback(f);
00023
00024 pnh_->getParam("run_type", this->run_type_);
00025 pnh_->getParam("trainer_manifest", this->trainer_manifest_filename_);
00026
00027 ROS_INFO("RUN TYPE: %s", run_type_.c_str());
00028 ROS_INFO("LOADED TRAINER MANIFEST: %s", trainer_manifest_filename_.c_str());
00029
00030 this->readTrainingManifestFromDirectory();
00031 loadTrainedDetectorModel();
00032 if (this->run_type_.compare("BOOTSTRAPER") == 0) {
00033 try {
00034 std::string _topic = "/dataset/background/roi";
00035 boost::shared_ptr<rosbag::Bag> tmp_bag(new rosbag::Bag);
00036 tmp_bag->open(this->ndataset_path_, rosbag::bagmode::Read);
00037 ROS_INFO("Bag Found and Opened Successfully ...");
00038 std::vector<std::string> topics;
00039 topics.push_back(_topic);
00040 rosbag::View view(*tmp_bag, rosbag::TopicQuery(topics));
00041 std::vector<sensor_msgs::Image> tmp_imgs;
00042 BOOST_FOREACH(rosbag::MessageInstance const m, view) {
00043 sensor_msgs::Image::ConstPtr img_msg = m.instantiate<
00044 sensor_msgs::Image>();
00045 tmp_imgs.push_back(*img_msg);
00046 }
00047 tmp_bag->close();
00048 this->rosbag_ = boost::shared_ptr<rosbag::Bag>(new rosbag::Bag);
00049 this->rosbag_->open(this->ndataset_path_, rosbag::bagmode::Write);
00050 for (std::vector<sensor_msgs::Image>::iterator it = tmp_imgs.begin();
00051 it != tmp_imgs.end(); it++) {
00052 this->rosbag_->write(_topic, ros::Time::now(), *it);
00053 }
00054 } catch (ros::Exception &e) {
00055 ROS_ERROR("ERROR: Bag File not found..\n%s", e.what());
00056 std::_Exit(EXIT_FAILURE);
00057 }
00058 }
00059 this->pub_rects_ = advertise<jsk_recognition_msgs::RectArray>(
00060 *pnh_, "output/rects", 1);
00061 this->pub_image_ = advertise<sensor_msgs::Image>(
00062 *pnh_, "output/image", 1);
00063 onInitPostProcess();
00064 }
00065
00066 void SlidingWindowObjectDetector::subscribe()
00067 {
00068 ROS_INFO("Subscribing...");
00069 this->sub_image_ = pnh_->subscribe(
00070 "input", 1, &SlidingWindowObjectDetector::imageCb, this);
00071 ros::V_string names = boost::assign::list_of("~input");
00072 jsk_topic_tools::warnNoRemap(names);
00073 }
00074
00075 void SlidingWindowObjectDetector::unsubscribe()
00076 {
00077 NODELET_DEBUG("Unsubscribing from ROS topic.");
00078 this->sub_image_.shutdown();
00079 }
00080
00081 void SlidingWindowObjectDetector::imageCb(const sensor_msgs::ImageConstPtr& msg)
00082 {
00083 cv_bridge::CvImagePtr cv_ptr;
00084 try {
00085 cv_ptr = cv_bridge::toCvCopy(msg, sensor_msgs::image_encodings::BGR8);
00086 } catch (cv_bridge::Exception& e) {
00087 ROS_ERROR("cv_bridge exception: %s", e.what());
00088 return;
00089 }
00090 cv::Mat image;
00091 cv::Size isize = cv_ptr->image.size();
00092
00093 const int downsize = this->downsize_;
00094 const float scale = this->scale_;
00095 const int img_stack = this->stack_size_;
00096 const int incrementor = this->incrementor_;
00097 cv::resize(cv_ptr->image, image, cv::Size(
00098 isize.width/downsize, isize.height/downsize));
00099 std::multimap<float, cv::Rect_<int> > detection_info =
00100 this->runSlidingWindowDetector(image, cv::Size(
00101 this->swindow_x, this->swindow_y),
00102 scale, img_stack, incrementor);
00103 cv::Mat dimg = image.clone();
00104 ROS_INFO("--Info Size: %ld", detection_info.size());
00105 for (std::multimap<float, cv::Rect_<int> >::iterator
00106 it = detection_info.begin(); it != detection_info.end(); it++) {
00107 cv::rectangle(dimg, it->second, cv::Scalar(0, 0, 255), 2);
00108 }
00109 if (this->run_type_.compare("DETECTOR") == 0) {
00110 const float nms_threshold = 0.01;
00111 std::vector<cv::Rect_<int> > object_rects = this->nonMaximumSuppression(
00112 detection_info, nms_threshold);
00113 cv::Mat bimg = image.clone();
00114 for (std::vector<cv::Rect_<int> >::iterator it = object_rects.begin();
00115 it != object_rects.end(); it++) {
00116 this->setBoundingBoxLabel(bimg, *it);
00117 cv::rectangle(bimg, *it, cv::Scalar(0, 255, 0), 1);
00118 }
00119 jsk_recognition_msgs::RectArray jsk_rect_array;
00120 this->convertCvRectToJSKRectArray(
00121 object_rects, jsk_rect_array, downsize, isize);
00122 jsk_rect_array.header = msg->header;
00123 cv_bridge::CvImagePtr out_msg(new cv_bridge::CvImage);
00124 out_msg->header = msg->header;
00125 out_msg->encoding = sensor_msgs::image_encodings::BGR8;
00126 out_msg->image = bimg.clone();
00127 this->pub_rects_.publish(jsk_rect_array);
00128 this->pub_image_.publish(out_msg->toImageMsg());
00129 } else if (this->run_type_.compare("BOOTSTRAPER") == 0) {
00130 for (std::multimap<float, cv::Rect_<int> >::const_iterator
00131 it = detection_info.begin(); it != detection_info.end(); it++) {
00132 cv::Mat roi = image(it->second).clone();
00133 cv::resize(
00134 roi, roi, cv::Size(
00135 roi.cols * this->downsize_, roi.rows * this->downsize_));
00136 if (roi.data) {
00137 ROS_INFO("Writing to bag file");
00138 cv_bridge::CvImagePtr write_roi(new cv_bridge::CvImage);
00139 write_roi->header = msg->header;
00140 write_roi->encoding = sensor_msgs::image_encodings::BGR8;
00141 write_roi->image = roi.clone();
00142 this->rosbag_->write("/dataset/background/roi",
00143 ros::Time::now(), write_roi->toImageMsg());
00144 cv::imshow("write_roi", roi);
00145 cv::waitKey(3);
00146 }
00147 }
00148 } else {
00149 this->pub_image_.publish(cv_ptr->toImageMsg());
00150 ROS_ERROR("NODELET RUNTYPE IS NOT SET.");
00151 std::_Exit(EXIT_FAILURE);
00152 }
00153 }
00154
00155 std::multimap<float, cv::Rect_<int> >
00156 SlidingWindowObjectDetector::runSlidingWindowDetector(
00157 const cv::Mat &image, const cv::Size wsize, const float scale,
00158 const int scale_counter, const int incrementor)
00159 {
00160 if (image.empty()) {
00161 ROS_ERROR("--INPUT IMAGE IS EMPTY");
00162 return std::multimap<float, cv::Rect_<int> >();
00163 }
00164 cv::Size nwsize = wsize;
00165 int scounter = 0;
00166 std::multimap<float, cv::Rect_<int> > detection_info;
00167 int sw_incrementor = incrementor;
00168 while (scounter++ < scale_counter) {
00169 this->objectRecognizer(image, detection_info, nwsize, sw_incrementor);
00170 this->pyramidialScaling(nwsize, scale);
00171 sw_incrementor += (sw_incrementor * scale);
00172 }
00173 return detection_info;
00174 }
00175
00176 void SlidingWindowObjectDetector::objectRecognizer(
00177 const cv::Mat &image, std::multimap<float, cv::Rect_<int> > &detection_info,
00178 const cv::Size wsize, const int incrementor)
00179 {
00180 for (int j = 0; j < image.rows; j += incrementor) {
00181 for (int i = 0; i < image.cols; i += incrementor) {
00182 cv::Rect_<int> rect = cv::Rect_<int>(i, j, wsize.width, wsize.height);
00183 if ((rect.x + rect.width <= image.cols) &&
00184 (rect.y + rect.height <= image.rows)) {
00185 cv::Mat roi = image(rect).clone();
00186 cv::GaussianBlur(roi, roi, cv::Size(3, 3), 1.0);
00187 cv::resize(roi, roi, cv::Size(this->swindow_x, this->swindow_y));
00188 cv::Mat hog_feature = this->computeHOG(roi);
00189 cv::Mat hsv_feature;
00190 this->computeHSHistogram(roi, hsv_feature, 16, 16, true);
00191 hsv_feature = hsv_feature.reshape(1, 1);
00192 cv::Mat _feature = hog_feature;
00193 this->concatenateCVMat(hog_feature, hsv_feature, _feature);
00194 #if CV_MAJOR_VERSION >= 3
00195 cv::Mat _ret;
00196 float response = this->supportVectorMachine_->predict(
00197 _feature, _ret, false);
00198 #else
00199 float response = this->supportVectorMachine_->predict(
00200 _feature, false);
00201 #endif
00202 if (response == 1) {
00203 detection_info.insert(std::make_pair(response, rect));
00204 } else {
00205 continue;
00206 }
00207 }
00208 }
00209 }
00210 }
00211
00215 void SlidingWindowObjectDetector::computeHSHistogram(
00216 cv::Mat &src, cv::Mat &hist, const int hBin, const int sBin, bool is_norm)
00217 {
00218 if (src.empty()) {
00219 return;
00220 }
00221 cv::Mat hsv;
00222 cv::cvtColor(src, hsv, CV_BGR2HSV);
00223 int histSize[] = {hBin, sBin};
00224 float h_ranges[] = {0, 180};
00225 float s_ranges[] = {0, 256};
00226 const float* ranges[] = {h_ranges, s_ranges};
00227 int channels[] = {0, 1};
00228 cv::calcHist(
00229 &hsv, 1, channels, cv::Mat(), hist, 2, histSize, ranges, true, false);
00230 if (is_norm) {
00231 cv::normalize(hist, hist, 0, 1, cv::NORM_MINMAX, -1, cv::Mat());
00232 }
00233 }
00234
00235 void SlidingWindowObjectDetector::pyramidialScaling(
00236 cv::Size &wsize, const float scale)
00237 {
00238 float nwidth = wsize.width + (wsize.width * scale);
00239 float nheight = wsize.height + (wsize.height * scale);
00240 const int min_swindow_size = 16;
00241 nwidth = (nwidth < min_swindow_size) ? min_swindow_size : nwidth;
00242 nheight = (nheight < min_swindow_size) ? min_swindow_size : nheight;
00243 wsize = cv::Size(std::abs(nwidth), std::abs(nheight));
00244 }
00245
00246 std::vector<cv::Rect_<int> > SlidingWindowObjectDetector::nonMaximumSuppression(
00247 std::multimap<float, cv::Rect_<int> > &detection_info,
00248 const float nms_threshold)
00249 {
00250 if (detection_info.empty()) {
00251 return std::vector<cv::Rect_<int> >();
00252 }
00253 jsk_perception::NonMaximumSuppression srv_nms;
00254 std::vector<jsk_recognition_msgs::Rect> rect_msg;
00255 for (std::multimap<float, cv::Rect_<int> >::iterator
00256 it = detection_info.begin(); it != detection_info.end(); it++) {
00257 cv::Rect_<int> cv_rect = it->second;
00258 jsk_recognition_msgs::Rect jsk_rect;
00259 jsk_rect.x = cv_rect.x;
00260 jsk_rect.y = cv_rect.y;
00261 jsk_rect.width = cv_rect.width;
00262 jsk_rect.height = cv_rect.height;
00263 srv_nms.request.rect.push_back(jsk_rect);
00264 }
00265 srv_nms.request.threshold = nms_threshold;
00266 std::vector<cv::Rect_<int> > bbox;
00267 if (this->nms_client_.call(srv_nms)) {
00268 for (int i = 0; i < srv_nms.response.bbox_count; i++) {
00269 cv::Rect_<int> brect = cv::Rect_<int>(
00270 srv_nms.response.bbox[i].x,
00271 srv_nms.response.bbox[i].y,
00272 srv_nms.response.bbox[i].width,
00273 srv_nms.response.bbox[i].height);
00274 bbox.push_back(brect);
00275 }
00276 } else {
00277 ROS_ERROR("Failed to call NonMaximumSuppression Module");
00278 return std::vector<cv::Rect_<int> >();
00279 }
00280 return bbox;
00281 }
00282
00283 void SlidingWindowObjectDetector::concatenateCVMat(
00284 const cv::Mat &mat_1, const cv::Mat &mat_2,
00285 cv::Mat &featureMD, bool iscolwise)
00286 {
00287 if (iscolwise) {
00288 featureMD = cv::Mat(mat_1.rows, (mat_1.cols + mat_2.cols), CV_32F);
00289 for (int i = 0; i < featureMD.rows; i++) {
00290 for (int j = 0; j < mat_1.cols; j++) {
00291 featureMD.at<float>(i, j) = mat_1.at<float>(i, j);
00292 }
00293 for (int j = mat_1.cols; j < featureMD.cols; j++) {
00294 featureMD.at<float>(i, j) = mat_2.at<float>(i, j - mat_1.cols);
00295 }
00296 }
00297 } else {
00298 featureMD = cv::Mat((mat_1.rows + mat_2.rows), mat_1.cols, CV_32F);
00299 for (int i = 0; i < featureMD.cols; i++) {
00300 for (int j = 0; j < mat_1.rows; j++) {
00301 featureMD.at<float>(j, i) = mat_1.at<float>(j, i);
00302 }
00303 for (int j = mat_1.rows; j < featureMD.rows; j++) {
00304 featureMD.at<float>(j, i) = mat_2.at<float>(j - mat_1.rows, i);
00305 }
00306 }
00307 }
00308 }
00309
00310 void SlidingWindowObjectDetector::convertCvRectToJSKRectArray(
00311 const std::vector<cv::Rect_<int> > &bounding_boxes,
00312 jsk_recognition_msgs::RectArray &jsk_rects,
00313 const int downsize, const cv::Size img_sz)
00314 {
00315 for (std::vector<cv::Rect_<int> >::const_iterator it =
00316 bounding_boxes.begin(); it != bounding_boxes.end(); it++) {
00317 jsk_recognition_msgs::Rect j_r;
00318 j_r.x = it->x * downsize;
00319 j_r.y = it->y * downsize;
00320 j_r.width = it->width * downsize;
00321 j_r.height = it->height * downsize;
00322 jsk_rects.rects.push_back(j_r);
00323 }
00324 }
00325
00326 void SlidingWindowObjectDetector::loadTrainedDetectorModel()
00327 {
00328 try {
00329 ROS_INFO("--Loading Trained SVM Classifier");
00330 #if CV_MAJOR_VERSION >= 3 // http://docs.opencv.org/master/d3/d46/classcv_1_1Algorithm.html
00331 this->supportVectorMachine_ = cv::ml::SVM::create();
00332 this->supportVectorMachine_ = cv::Algorithm::load<cv::ml::SVM>(this->model_name_);
00333 #else
00334 this->supportVectorMachine_ = boost::shared_ptr<cv::SVM>(new cv::SVM);
00335 this->supportVectorMachine_->load(this->model_name_.c_str());
00336 #endif
00337 ROS_INFO("--Classifier Loaded Successfully");
00338 } catch(cv::Exception &e) {
00339 ROS_ERROR("--ERROR: Fail to load Classifier \n%s", e.what());
00340 std::_Exit(EXIT_FAILURE);
00341 }
00342 }
00343
00344 void SlidingWindowObjectDetector::readTrainingManifestFromDirectory()
00345 {
00346 cv::FileStorage fs = cv::FileStorage(
00347 this->trainer_manifest_filename_, cv::FileStorage::READ);
00348 if (!fs.isOpened()) {
00349 ROS_ERROR("TRAINER MANIFEST NOT FOUND..");
00350 std::_Exit(EXIT_FAILURE);
00351 }
00352 cv::FileNode n = fs["TrainerInfo"];
00353 std::string ttype = n["trainer_type"];
00354 std::string tpath = n["trainer_path"];
00355 this->model_name_ = tpath;
00356
00357 n = fs["FeatureInfo"];
00358 int hog = static_cast<int>(n["HOG"]);
00359 int lbp = static_cast<int>(n["LBP"]);
00360
00361 n = fs["SlidingWindowInfo"];
00362 int sw_x = static_cast<int>(n["swindow_x"]);
00363 int sw_y = static_cast<int>(n["swindow_y"]);
00364 this->swindow_x = sw_x;
00365 this->swindow_y = sw_y;
00366
00367 n = fs["TrainingDatasetDirectoryInfo"];
00368 std::string pfile = n["object_dataset_filename"];
00369 std::string nfile = n["nonobject_dataset_filename"];
00370 std::string dataset_path = n["dataset_path"];
00371 this->object_dataset_filename_ = pfile;
00372 this->nonobject_dataset_filename_ = nfile;
00373 this->ndataset_path_ = dataset_path + nfile;
00374 }
00375
00376 void SlidingWindowObjectDetector::setBoundingBoxLabel(
00377 cv::Mat& im, cv::Rect_<int> rect, const std::string label)
00378 {
00379 int fontface = cv::FONT_HERSHEY_SIMPLEX;
00380 double scale = 0.2;
00381 int thickness = 1;
00382 int baseline = 0;
00383 cv::Size text = cv::getTextSize(
00384 label, fontface, scale, thickness, &baseline);
00385 cv::Point pt = cv::Point(
00386 rect.x + (rect.width-text.width), rect.y + (rect.height+text.height));
00387 cv::rectangle(im, pt + cv::Point(0, baseline),
00388 pt + cv::Point(text.width, -text.height),
00389 CV_RGB(0, 0, 255), CV_FILLED);
00390 cv::putText(im, label, pt,
00391 fontface, scale, CV_RGB(255, 0, 0), thickness, 8);
00392 }
00393
00394 void SlidingWindowObjectDetector::configCallback(
00395 jsk_perception::SlidingWindowObjectDetectorConfig &config, uint32_t level)
00396 {
00397 boost::mutex::scoped_lock lock(mutex_);
00398 this->scale_ = static_cast<float>(config.scaling_factor);
00399 this->stack_size_ = static_cast<int>(config.stack_size);
00400 this->incrementor_ = config.sliding_window_increment;
00401 this->downsize_ = config.image_downsize;
00402
00403
00404
00405
00406 }
00407 }
00408
00409 #include <pluginlib/class_list_macros.h>
00410 PLUGINLIB_EXPORT_CLASS(jsk_perception::SlidingWindowObjectDetector, nodelet::Nodelet);