jsk_perception: sliding_window_object_detector.cpp Source File

Go to the documentation of this file.
00001 // @author Krishneel Chaudhary, JSK
00002 
00003 #include <jsk_perception/sliding_window_object_detector.h>
00004 #include <boost/assign.hpp>
00005 #include <jsk_topic_tools/log_utils.h>
00006 #include <jsk_recognition_msgs/Rect.h>
00007 #include <jsk_perception/NonMaximumSuppression.h>
00008 
00009 namespace jsk_perception
00010 {
00011    void SlidingWindowObjectDetector::onInit()
00012    {
00013       DiagnosticNodelet::onInit();
00014 
00015       this->nms_client_ = pnh_->serviceClient<
00016          jsk_perception::NonMaximumSuppression>("non_maximum_suppression");
00017       this->srv_ = boost::make_shared<dynamic_reconfigure::Server<
00018          jsk_perception::SlidingWindowObjectDetectorConfig> >(*pnh_);
00019       dynamic_reconfigure::Server<
00020          jsk_perception::SlidingWindowObjectDetectorConfig>::CallbackType f =
00021          boost::bind(&SlidingWindowObjectDetector::configCallback, this, _1, _2);
00022       this->srv_->setCallback(f);
00023       
00024       pnh_->getParam("run_type", this->run_type_);
00025       pnh_->getParam("trainer_manifest", this->trainer_manifest_filename_);
00026 
00027       ROS_INFO("RUN TYPE: %s", run_type_.c_str());
00028       ROS_INFO("LOADED TRAINER MANIFEST: %s", trainer_manifest_filename_.c_str());
00029 
00030       this->readTrainingManifestFromDirectory();
00031       loadTrainedDetectorModel();
00032       if (this->run_type_.compare("BOOTSTRAPER") == 0) {
00033          try {
00034             std::string _topic = "/dataset/background/roi";
00035             boost::shared_ptr<rosbag::Bag> tmp_bag(new rosbag::Bag);
00036             tmp_bag->open(this->ndataset_path_, rosbag::bagmode::Read);
00037             ROS_INFO("Bag Found and Opened Successfully ...");
00038             std::vector<std::string> topics;
00039             topics.push_back(_topic);
00040             rosbag::View view(*tmp_bag, rosbag::TopicQuery(topics));
00041             std::vector<sensor_msgs::Image> tmp_imgs;
00042             BOOST_FOREACH(rosbag::MessageInstance const m, view) {
00043                sensor_msgs::Image::ConstPtr img_msg = m.instantiate<
00044                sensor_msgs::Image>();
00045                tmp_imgs.push_back(*img_msg);
00046             }
00047             tmp_bag->close();
00048             this->rosbag_ = boost::shared_ptr<rosbag::Bag>(new rosbag::Bag);
00049             this->rosbag_->open(this->ndataset_path_, rosbag::bagmode::Write);
00050             for (std::vector<sensor_msgs::Image>::iterator it = tmp_imgs.begin();
00051                  it != tmp_imgs.end(); it++) {
00052                this->rosbag_->write(_topic, ros::Time::now(), *it);
00053             }
00054          } catch (ros::Exception &e) {
00055             ROS_ERROR("ERROR: Bag File not found..\n%s", e.what());
00056             std::_Exit(EXIT_FAILURE);
00057          }         
00058       }
00059       this->pub_rects_ = advertise<jsk_recognition_msgs::RectArray>(
00060          *pnh_, "output/rects", 1);
00061       this->pub_image_ = advertise<sensor_msgs::Image>(
00062          *pnh_, "output/image", 1);
00063       onInitPostProcess();
00064    }
00065 
00066    void SlidingWindowObjectDetector::subscribe()
00067    {
00068       ROS_INFO("Subscribing...");
00069       this->sub_image_ = pnh_->subscribe(
00070          "input", 1, &SlidingWindowObjectDetector::imageCb, this);
00071       ros::V_string names = boost::assign::list_of("~input");
00072       jsk_topic_tools::warnNoRemap(names);
00073    }
00074    
00075    void SlidingWindowObjectDetector::unsubscribe()
00076    {
00077       NODELET_DEBUG("Unsubscribing from ROS topic.");
00078       this->sub_image_.shutdown();
00079    }
00080    
00081    void SlidingWindowObjectDetector::imageCb(const sensor_msgs::ImageConstPtr& msg)
00082    {
00083       cv_bridge::CvImagePtr cv_ptr;
00084       try {
00085          cv_ptr = cv_bridge::toCvCopy(msg, sensor_msgs::image_encodings::BGR8);
00086       } catch (cv_bridge::Exception& e) {
00087          ROS_ERROR("cv_bridge exception: %s", e.what());
00088          return;
00089       }      
00090       cv::Mat image;
00091       cv::Size isize = cv_ptr->image.size();
00092       // control params
00093       const int downsize = this->downsize_;
00094       const float scale = this->scale_;
00095       const int img_stack = this->stack_size_;
00096       const int incrementor = this->incrementor_;
00097       cv::resize(cv_ptr->image, image, cv::Size(
00098                     isize.width/downsize, isize.height/downsize));
00099       std::multimap<float, cv::Rect_<int> > detection_info =
00100          this->runSlidingWindowDetector(image, cv::Size(
00101                                       this->swindow_x, this->swindow_y),
00102                                    scale, img_stack, incrementor);
00103       cv::Mat dimg = image.clone();
00104       ROS_INFO("--Info Size: %ld", detection_info.size());
00105       for (std::multimap<float, cv::Rect_<int> >::iterator
00106               it = detection_info.begin(); it != detection_info.end(); it++) {
00107          cv::rectangle(dimg, it->second, cv::Scalar(0, 0, 255), 2);
00108       }
00109       if (this->run_type_.compare("DETECTOR") == 0) {
00110          const float nms_threshold = 0.01;
00111          std::vector<cv::Rect_<int> > object_rects = this->nonMaximumSuppression(
00112             detection_info, nms_threshold);
00113          cv::Mat bimg = image.clone();
00114          for (std::vector<cv::Rect_<int> >::iterator it = object_rects.begin();
00115               it != object_rects.end(); it++) {
00116             this->setBoundingBoxLabel(bimg, *it);
00117             cv::rectangle(bimg, *it, cv::Scalar(0, 255, 0), 1);
00118          }
00119          jsk_recognition_msgs::RectArray jsk_rect_array;
00120          this->convertCvRectToJSKRectArray(
00121             object_rects, jsk_rect_array, downsize, isize);
00122          jsk_rect_array.header = msg->header;
00123          cv_bridge::CvImagePtr out_msg(new cv_bridge::CvImage);
00124          out_msg->header = msg->header;
00125          out_msg->encoding = sensor_msgs::image_encodings::BGR8;
00126          out_msg->image = bimg.clone();
00127          this->pub_rects_.publish(jsk_rect_array);
00128          this->pub_image_.publish(out_msg->toImageMsg());
00129       } else if (this->run_type_.compare("BOOTSTRAPER") == 0) {
00130          for (std::multimap<float, cv::Rect_<int> >::const_iterator
00131                  it = detection_info.begin(); it != detection_info.end(); it++) {
00132             cv::Mat roi = image(it->second).clone();
00133             cv::resize(
00134                roi, roi, cv::Size(
00135                   roi.cols * this->downsize_, roi.rows * this->downsize_));
00136             if (roi.data) {
00137                ROS_INFO("Writing to bag file");
00138                cv_bridge::CvImagePtr write_roi(new cv_bridge::CvImage);
00139                write_roi->header = msg->header;
00140                write_roi->encoding = sensor_msgs::image_encodings::BGR8;
00141                write_roi->image = roi.clone();
00142                this->rosbag_->write("/dataset/background/roi",
00143                                     ros::Time::now(), write_roi->toImageMsg());
00144                cv::imshow("write_roi", roi);
00145                cv::waitKey(3);
00146             }
00147          }
00148       } else {
00149          this->pub_image_.publish(cv_ptr->toImageMsg());
00150          ROS_ERROR("NODELET RUNTYPE IS NOT SET.");
00151          std::_Exit(EXIT_FAILURE);
00152       }
00153    }
00154 
00155    std::multimap<float, cv::Rect_<int> >
00156    SlidingWindowObjectDetector::runSlidingWindowDetector(
00157       const cv::Mat &image, const cv::Size wsize, const float scale,
00158       const int scale_counter, const int incrementor)
00159    {
00160       if (image.empty()) {
00161          ROS_ERROR("--INPUT IMAGE IS EMPTY");
00162          return std::multimap<float, cv::Rect_<int> >();
00163       }
00164       cv::Size nwsize = wsize;
00165       int scounter = 0;
00166       std::multimap<float, cv::Rect_<int> > detection_info;
00167       int sw_incrementor = incrementor;
00168       while (scounter++ < scale_counter) {
00169          this->objectRecognizer(image, detection_info, nwsize, sw_incrementor);
00170          this->pyramidialScaling(nwsize, scale);
00171          sw_incrementor += (sw_incrementor * scale);
00172       }
00173       return detection_info;
00174    }
00175 
00176    void SlidingWindowObjectDetector::objectRecognizer(
00177       const cv::Mat &image, std::multimap<float, cv::Rect_<int> > &detection_info,
00178       const cv::Size wsize, const int incrementor)
00179    {
00180       for (int j = 0; j < image.rows; j += incrementor) {
00181          for (int i = 0; i < image.cols; i += incrementor) {
00182             cv::Rect_<int> rect = cv::Rect_<int>(i, j, wsize.width, wsize.height);
00183             if ((rect.x + rect.width <= image.cols) &&
00184                 (rect.y + rect.height <= image.rows)) {
00185                cv::Mat roi = image(rect).clone();
00186                cv::GaussianBlur(roi, roi, cv::Size(3, 3), 1.0);
00187                cv::resize(roi, roi, cv::Size(this->swindow_x, this->swindow_y));
00188                cv::Mat hog_feature = this->computeHOG(roi);
00189                cv::Mat hsv_feature;
00190                this->computeHSHistogram(roi, hsv_feature, 16, 16, true);
00191                hsv_feature = hsv_feature.reshape(1, 1);
00192                cv::Mat _feature = hog_feature;
00193                this->concatenateCVMat(hog_feature, hsv_feature, _feature);
00194 #if CV_MAJOR_VERSION >= 3
00195                cv::Mat _ret;
00196                float response = this->supportVectorMachine_->predict(
00197                                                                      _feature, _ret, false);
00198 #else
00199                float response = this->supportVectorMachine_->predict(
00200                   _feature, false);
00201 #endif
00202                if (response == 1) {
00203                   detection_info.insert(std::make_pair(response, rect));
00204                } else {
00205                   continue;
00206                }
00207             }
00208          }
00209       }
00210    }
00211 
00215    void SlidingWindowObjectDetector::computeHSHistogram(
00216       cv::Mat &src, cv::Mat &hist, const int hBin, const int sBin, bool is_norm)
00217    {
00218       if (src.empty()) {
00219          return;
00220       }
00221       cv::Mat hsv;
00222       cv::cvtColor(src, hsv, CV_BGR2HSV);
00223       int histSize[] = {hBin, sBin};
00224       float h_ranges[] = {0, 180};
00225       float s_ranges[] = {0, 256};
00226       const float* ranges[] = {h_ranges, s_ranges};
00227       int channels[] = {0, 1};
00228       cv::calcHist(
00229          &hsv, 1, channels, cv::Mat(), hist, 2, histSize, ranges, true, false);
00230       if (is_norm) {
00231          cv::normalize(hist, hist, 0, 1, cv::NORM_MINMAX, -1, cv::Mat());
00232       }
00233    }
00234    
00235    void SlidingWindowObjectDetector::pyramidialScaling(
00236       cv::Size &wsize, const float scale)
00237    {
00238       float nwidth = wsize.width + (wsize.width * scale);
00239       float nheight = wsize.height + (wsize.height * scale);
00240       const int min_swindow_size = 16;
00241       nwidth = (nwidth < min_swindow_size) ? min_swindow_size : nwidth;
00242       nheight = (nheight < min_swindow_size) ? min_swindow_size : nheight;
00243       wsize = cv::Size(std::abs(nwidth), std::abs(nheight));
00244    }
00245 
00246    std::vector<cv::Rect_<int> > SlidingWindowObjectDetector::nonMaximumSuppression(
00247       std::multimap<float, cv::Rect_<int> > &detection_info,
00248       const float nms_threshold)
00249    {
00250       if (detection_info.empty()) {
00251          return std::vector<cv::Rect_<int> >();
00252       }
00253       jsk_perception::NonMaximumSuppression srv_nms;
00254       std::vector<jsk_recognition_msgs::Rect> rect_msg;
00255       for (std::multimap<float, cv::Rect_<int> >::iterator
00256               it = detection_info.begin(); it != detection_info.end(); it++) {
00257          cv::Rect_<int> cv_rect = it->second;
00258          jsk_recognition_msgs::Rect jsk_rect;
00259          jsk_rect.x = cv_rect.x;
00260          jsk_rect.y = cv_rect.y;
00261          jsk_rect.width = cv_rect.width;
00262          jsk_rect.height = cv_rect.height;
00263          srv_nms.request.rect.push_back(jsk_rect);
00264       }
00265       srv_nms.request.threshold = nms_threshold;
00266       std::vector<cv::Rect_<int> > bbox;
00267       if (this->nms_client_.call(srv_nms)) {
00268          for (int i = 0; i < srv_nms.response.bbox_count; i++) {
00269             cv::Rect_<int> brect = cv::Rect_<int>(
00270                srv_nms.response.bbox[i].x,
00271                srv_nms.response.bbox[i].y,
00272                srv_nms.response.bbox[i].width,
00273                srv_nms.response.bbox[i].height);
00274             bbox.push_back(brect);
00275          }
00276       } else {
00277          ROS_ERROR("Failed to call NonMaximumSuppression Module");
00278          return std::vector<cv::Rect_<int> >();
00279       }
00280       return bbox;
00281    }
00282 
00283    void SlidingWindowObjectDetector::concatenateCVMat(
00284       const cv::Mat &mat_1, const cv::Mat &mat_2,
00285       cv::Mat &featureMD, bool iscolwise)
00286    {
00287       if (iscolwise) {
00288          featureMD = cv::Mat(mat_1.rows, (mat_1.cols + mat_2.cols), CV_32F);
00289          for (int i = 0; i < featureMD.rows; i++) {
00290             for (int j = 0; j < mat_1.cols; j++) {
00291                featureMD.at<float>(i, j) = mat_1.at<float>(i, j);
00292             }
00293             for (int j = mat_1.cols; j < featureMD.cols; j++) {
00294                featureMD.at<float>(i, j) = mat_2.at<float>(i, j - mat_1.cols);
00295             }
00296          }
00297       } else {
00298          featureMD = cv::Mat((mat_1.rows + mat_2.rows), mat_1.cols, CV_32F);
00299          for (int i = 0; i < featureMD.cols; i++) {
00300             for (int j = 0; j < mat_1.rows; j++) {
00301                featureMD.at<float>(j, i) = mat_1.at<float>(j, i);
00302             }
00303             for (int j = mat_1.rows; j < featureMD.rows; j++) {
00304                featureMD.at<float>(j, i) = mat_2.at<float>(j - mat_1.rows, i);
00305             }
00306          }
00307       }
00308    }
00309    
00310    void SlidingWindowObjectDetector::convertCvRectToJSKRectArray(
00311       const std::vector<cv::Rect_<int> > &bounding_boxes,
00312       jsk_recognition_msgs::RectArray &jsk_rects,
00313       const int downsize, const cv::Size img_sz)
00314    {
00315       for (std::vector<cv::Rect_<int> >::const_iterator it =
00316               bounding_boxes.begin(); it != bounding_boxes.end(); it++) {
00317          jsk_recognition_msgs::Rect j_r;
00318          j_r.x = it->x * downsize;
00319          j_r.y = it->y * downsize;
00320          j_r.width = it->width * downsize;
00321          j_r.height = it->height * downsize;
00322          jsk_rects.rects.push_back(j_r);
00323       }
00324    }
00325    
00326    void SlidingWindowObjectDetector::loadTrainedDetectorModel()
00327    {
00328       try {
00329          ROS_INFO("--Loading Trained SVM Classifier");
00330 #if CV_MAJOR_VERSION >= 3 // http://docs.opencv.org/master/d3/d46/classcv_1_1Algorithm.html
00331          this->supportVectorMachine_ = cv::ml::SVM::create();
00332          this->supportVectorMachine_ = cv::Algorithm::load<cv::ml::SVM>(this->model_name_);
00333 #else
00334          this->supportVectorMachine_ = boost::shared_ptr<cv::SVM>(new cv::SVM);
00335          this->supportVectorMachine_->load(this->model_name_.c_str());
00336 #endif
00337          ROS_INFO("--Classifier Loaded Successfully");
00338       } catch(cv::Exception &e) {
00339          ROS_ERROR("--ERROR: Fail to load Classifier \n%s", e.what());
00340          std::_Exit(EXIT_FAILURE);
00341       }
00342    }
00343    
00344    void SlidingWindowObjectDetector::readTrainingManifestFromDirectory()
00345    {
00346       cv::FileStorage fs = cv::FileStorage(
00347          this->trainer_manifest_filename_, cv::FileStorage::READ);
00348       if (!fs.isOpened()) {
00349          ROS_ERROR("TRAINER MANIFEST NOT FOUND..");
00350          std::_Exit(EXIT_FAILURE);
00351       }
00352       cv::FileNode n = fs["TrainerInfo"];
00353       std::string ttype = n["trainer_type"];
00354       std::string tpath = n["trainer_path"];
00355       this->model_name_ = tpath;   // classifier path
00356     
00357       n = fs["FeatureInfo"];       // features used
00358       int hog = static_cast<int>(n["HOG"]);
00359       int lbp = static_cast<int>(n["LBP"]);
00360 
00361       n = fs["SlidingWindowInfo"];  // window size
00362       int sw_x = static_cast<int>(n["swindow_x"]);
00363       int sw_y = static_cast<int>(n["swindow_y"]);
00364       this->swindow_x = sw_x;
00365       this->swindow_y = sw_y;
00366     
00367       n = fs["TrainingDatasetDirectoryInfo"];
00368       std::string pfile = n["object_dataset_filename"];
00369       std::string nfile = n["nonobject_dataset_filename"];
00370       std::string dataset_path = n["dataset_path"];
00371       this->object_dataset_filename_ = pfile;  // object dataset
00372       this->nonobject_dataset_filename_ = nfile;
00373       this->ndataset_path_ = dataset_path + nfile;  // ~/.ros/dir/negative/x.bag
00374    }
00375 
00376    void SlidingWindowObjectDetector::setBoundingBoxLabel(
00377       cv::Mat& im, cv::Rect_<int> rect, const std::string label)
00378    {
00379       int fontface = cv::FONT_HERSHEY_SIMPLEX;
00380       double scale = 0.2;
00381       int thickness = 1;
00382       int baseline = 0;
00383       cv::Size text = cv::getTextSize(
00384          label, fontface, scale, thickness, &baseline);
00385       cv::Point pt = cv::Point(
00386          rect.x + (rect.width-text.width), rect.y + (rect.height+text.height));
00387       cv::rectangle(im, pt + cv::Point(0, baseline),
00388                     pt + cv::Point(text.width, -text.height),
00389                     CV_RGB(0, 0, 255), CV_FILLED);
00390       cv::putText(im, label, pt,
00391                   fontface, scale, CV_RGB(255, 0, 0), thickness, 8);
00392    }
00393    
00394    void SlidingWindowObjectDetector::configCallback(
00395       jsk_perception::SlidingWindowObjectDetectorConfig &config, uint32_t level)
00396    {
00397       boost::mutex::scoped_lock lock(mutex_);
00398       this->scale_ = static_cast<float>(config.scaling_factor);
00399       this->stack_size_ = static_cast<int>(config.stack_size);
00400       this->incrementor_ = config.sliding_window_increment;
00401       this->downsize_ = config.image_downsize;
00402       
00403       // currently fixed variables
00404       // this->swindow_x = config.sliding_window_width;
00405       // this->swindow_y = config.sliding_window_height;
00406    }
00407 }  // namespace jsk_perception
00408 
00409 #include <pluginlib/class_list_macros.h>
00410 PLUGINLIB_EXPORT_CLASS(jsk_perception::SlidingWindowObjectDetector, nodelet::Nodelet);