jsk_perception: sliding_window_object_detector.cpp Source File

Go to the documentation of this file.
00001 // @author Krishneel Chaudhary, JSK
00002 
00003 #include <jsk_perception/sliding_window_object_detector.h>
00004 #include <jsk_recognition_msgs/Rect.h>
00005 #include <jsk_perception/NonMaximumSuppression.h>
00006 
00007 namespace jsk_perception
00008 {
00009    void SlidingWindowObjectDetector::onInit()
00010    {
00011       DiagnosticNodelet::onInit();
00012 
00013       this->nms_client_ = pnh_->serviceClient<
00014          jsk_perception::NonMaximumSuppression>("non_maximum_suppression");
00015       this->srv_ = boost::make_shared<dynamic_reconfigure::Server<
00016          jsk_perception::SlidingWindowObjectDetectorConfig> >(*pnh_);
00017       dynamic_reconfigure::Server<
00018          jsk_perception::SlidingWindowObjectDetectorConfig>::CallbackType f =
00019          boost::bind(&SlidingWindowObjectDetector::configCallback, this, _1, _2);
00020       this->srv_->setCallback(f);
00021       
00022       pnh_->getParam("run_type", this->run_type_);
00023       pnh_->getParam("trainer_manifest", this->trainer_manifest_filename_);
00024 
00025       ROS_INFO("RUN TYPE: %s", run_type_.c_str());
00026       ROS_INFO("LOADED TRAINER MANIFEST: %s", trainer_manifest_filename_.c_str());
00027 
00028       this->readTrainingManifestFromDirectory();
00029       loadTrainedDetectorModel();
00030       if (this->run_type_.compare("BOOTSTRAPER") == 0) {
00031          try {
00032             std::string _topic = "/dataset/background/roi";
00033             boost::shared_ptr<rosbag::Bag> tmp_bag(new rosbag::Bag);
00034             tmp_bag->open(this->ndataset_path_, rosbag::bagmode::Read);
00035             ROS_INFO("Bag Found and Opened Successfully ...");
00036             std::vector<std::string> topics;
00037             topics.push_back(_topic);
00038             rosbag::View view(*tmp_bag, rosbag::TopicQuery(topics));
00039             std::vector<sensor_msgs::Image> tmp_imgs;
00040             BOOST_FOREACH(rosbag::MessageInstance const m, view) {
00041                sensor_msgs::Image::ConstPtr img_msg = m.instantiate<
00042                sensor_msgs::Image>();
00043                tmp_imgs.push_back(*img_msg);
00044             }
00045             tmp_bag->close();
00046             this->rosbag_ = boost::shared_ptr<rosbag::Bag>(new rosbag::Bag);
00047             this->rosbag_->open(this->ndataset_path_, rosbag::bagmode::Write);
00048             for (std::vector<sensor_msgs::Image>::iterator it = tmp_imgs.begin();
00049                  it != tmp_imgs.end(); it++) {
00050                this->rosbag_->write(_topic, ros::Time::now(), *it);
00051             }
00052          } catch (ros::Exception &e) {
00053             ROS_ERROR("ERROR: Bag File not found..\n%s", e.what());
00054             std::_Exit(EXIT_FAILURE);
00055          }         
00056       }
00057       this->pub_rects_ = advertise<jsk_recognition_msgs::RectArray>(
00058          *pnh_, "output/rects", 1);
00059       this->pub_image_ = advertise<sensor_msgs::Image>(
00060          *pnh_, "output/image", 1);
00061    }
00062 
00063    void SlidingWindowObjectDetector::subscribe()
00064    {
00065       ROS_INFO("Subscribing...");
00066       this->sub_image_ = pnh_->subscribe(
00067          "input", 1, &SlidingWindowObjectDetector::imageCb, this);
00068    }
00069    
00070    void SlidingWindowObjectDetector::unsubscribe()
00071    {
00072       NODELET_DEBUG("Unsubscribing from ROS topic.");
00073       this->sub_image_.shutdown();
00074    }
00075    
00076    void SlidingWindowObjectDetector::imageCb(const sensor_msgs::ImageConstPtr& msg)
00077    {
00078       cv_bridge::CvImagePtr cv_ptr;
00079       try {
00080          cv_ptr = cv_bridge::toCvCopy(msg, sensor_msgs::image_encodings::BGR8);
00081       } catch (cv_bridge::Exception& e) {
00082          ROS_ERROR("cv_bridge exception: %s", e.what());
00083          return;
00084       }      
00085       cv::Mat image;
00086       cv::Size isize = cv_ptr->image.size();
00087       // control params
00088       const int downsize = this->downsize_;
00089       const float scale = this->scale_;
00090       const int img_stack = this->stack_size_;
00091       const int incrementor = this->incrementor_;
00092       cv::resize(cv_ptr->image, image, cv::Size(
00093                     isize.width/downsize, isize.height/downsize));
00094       std::multimap<float, cv::Rect_<int> > detection_info =
00095          this->runSlidingWindowDetector(image, cv::Size(
00096                                       this->swindow_x, this->swindow_y),
00097                                    scale, img_stack, incrementor);
00098       cv::Mat dimg = image.clone();
00099       ROS_INFO("--Info Size: %ld", detection_info.size());
00100       for (std::multimap<float, cv::Rect_<int> >::iterator
00101               it = detection_info.begin(); it != detection_info.end(); it++) {
00102          cv::rectangle(dimg, it->second, cv::Scalar(0, 0, 255), 2);
00103       }
00104       if (this->run_type_.compare("DETECTOR") == 0) {
00105          const float nms_threshold = 0.01;
00106          std::vector<cv::Rect_<int> > object_rects = this->nonMaximumSuppression(
00107             detection_info, nms_threshold);
00108          cv::Mat bimg = image.clone();
00109          for (std::vector<cv::Rect_<int> >::iterator it = object_rects.begin();
00110               it != object_rects.end(); it++) {
00111             this->setBoundingBoxLabel(bimg, *it);
00112             cv::rectangle(bimg, *it, cv::Scalar(0, 255, 0), 1);
00113          }
00114          jsk_recognition_msgs::RectArray jsk_rect_array;
00115          this->convertCvRectToJSKRectArray(
00116             object_rects, jsk_rect_array, downsize, isize);
00117          jsk_rect_array.header = msg->header;
00118          cv_bridge::CvImagePtr out_msg(new cv_bridge::CvImage);
00119          out_msg->header = msg->header;
00120          out_msg->encoding = sensor_msgs::image_encodings::BGR8;
00121          out_msg->image = bimg.clone();
00122          this->pub_rects_.publish(jsk_rect_array);
00123          this->pub_image_.publish(out_msg->toImageMsg());
00124       } else if (this->run_type_.compare("BOOTSTRAPER") == 0) {
00125          for (std::multimap<float, cv::Rect_<int> >::const_iterator
00126                  it = detection_info.begin(); it != detection_info.end(); it++) {
00127             cv::Mat roi = image(it->second).clone();
00128             cv::resize(
00129                roi, roi, cv::Size(
00130                   roi.cols * this->downsize_, roi.rows * this->downsize_));
00131             if (roi.data) {
00132                ROS_INFO("Writing to bag file");
00133                cv_bridge::CvImagePtr write_roi(new cv_bridge::CvImage);
00134                write_roi->header = msg->header;
00135                write_roi->encoding = sensor_msgs::image_encodings::BGR8;
00136                write_roi->image = roi.clone();
00137                this->rosbag_->write("/dataset/background/roi",
00138                                     ros::Time::now(), write_roi->toImageMsg());
00139                cv::imshow("write_roi", roi);
00140                cv::waitKey(3);
00141             }
00142          }
00143       } else {
00144          this->pub_image_.publish(cv_ptr->toImageMsg());
00145          ROS_ERROR("NODELET RUNTYPE IS NOT SET.");
00146          std::_Exit(EXIT_FAILURE);
00147       }
00148    }
00149 
00150    std::multimap<float, cv::Rect_<int> >
00151    SlidingWindowObjectDetector::runSlidingWindowDetector(
00152       const cv::Mat &image, const cv::Size wsize, const float scale,
00153       const int scale_counter, const int incrementor)
00154    {
00155       if (image.empty()) {
00156          ROS_ERROR("--INPUT IMAGE IS EMPTY");
00157          return std::multimap<float, cv::Rect_<int> >();
00158       }
00159       cv::Size nwsize = wsize;
00160       int scounter = 0;
00161       std::multimap<float, cv::Rect_<int> > detection_info;
00162       int sw_incrementor = incrementor;
00163       while (scounter++ < scale_counter) {
00164          this->objectRecognizer(image, detection_info, nwsize, sw_incrementor);
00165          this->pyramidialScaling(nwsize, scale);
00166          sw_incrementor += (sw_incrementor * scale);
00167       }
00168       return detection_info;
00169    }
00170 
00171    void SlidingWindowObjectDetector::objectRecognizer(
00172       const cv::Mat &image, std::multimap<float, cv::Rect_<int> > &detection_info,
00173       const cv::Size wsize, const int incrementor)
00174    {
00175       for (int j = 0; j < image.rows; j += incrementor) {
00176          for (int i = 0; i < image.cols; i += incrementor) {
00177             cv::Rect_<int> rect = cv::Rect_<int>(i, j, wsize.width, wsize.height);
00178             if ((rect.x + rect.width <= image.cols) &&
00179                 (rect.y + rect.height <= image.rows)) {
00180                cv::Mat roi = image(rect).clone();
00181                cv::GaussianBlur(roi, roi, cv::Size(3, 3), 1.0);
00182                cv::resize(roi, roi, cv::Size(this->swindow_x, this->swindow_y));
00183                cv::Mat hog_feature = this->computeHOG(roi);
00184                cv::Mat hsv_feature;
00185                this->computeHSHistogram(roi, hsv_feature, 16, 16, true);
00186                hsv_feature = hsv_feature.reshape(1, 1);
00187                cv::Mat _feature = hog_feature;
00188                this->concatenateCVMat(hog_feature, hsv_feature, _feature);
00189 #if CV_MAJOR_VERSION >= 3
00190                cv::Mat _ret;
00191                float response = this->supportVectorMachine_->predict(
00192                                                                      _feature, _ret, false);
00193 #else
00194                float response = this->supportVectorMachine_->predict(
00195                   _feature, false);
00196 #endif
00197                if (response == 1) {
00198                   detection_info.insert(std::make_pair(response, rect));
00199                } else {
00200                   continue;
00201                }
00202             }
00203          }
00204       }
00205    }
00206 
00210    void SlidingWindowObjectDetector::computeHSHistogram(
00211       cv::Mat &src, cv::Mat &hist, const int hBin, const int sBin, bool is_norm)
00212    {
00213       if (src.empty()) {
00214          return;
00215       }
00216       cv::Mat hsv;
00217       cv::cvtColor(src, hsv, CV_BGR2HSV);
00218       int histSize[] = {hBin, sBin};
00219       float h_ranges[] = {0, 180};
00220       float s_ranges[] = {0, 256};
00221       const float* ranges[] = {h_ranges, s_ranges};
00222       int channels[] = {0, 1};
00223       cv::calcHist(
00224          &hsv, 1, channels, cv::Mat(), hist, 2, histSize, ranges, true, false);
00225       if (is_norm) {
00226          cv::normalize(hist, hist, 0, 1, cv::NORM_MINMAX, -1, cv::Mat());
00227       }
00228    }
00229    
00230    void SlidingWindowObjectDetector::pyramidialScaling(
00231       cv::Size &wsize, const float scale)
00232    {
00233       float nwidth = wsize.width + (wsize.width * scale);
00234       float nheight = wsize.height + (wsize.height * scale);
00235       const int min_swindow_size = 16;
00236       nwidth = (nwidth < min_swindow_size) ? min_swindow_size : nwidth;
00237       nheight = (nheight < min_swindow_size) ? min_swindow_size : nheight;
00238       wsize = cv::Size(std::abs(nwidth), std::abs(nheight));
00239    }
00240 
00241    std::vector<cv::Rect_<int> > SlidingWindowObjectDetector::nonMaximumSuppression(
00242       std::multimap<float, cv::Rect_<int> > &detection_info,
00243       const float nms_threshold)
00244    {
00245       if (detection_info.empty()) {
00246          return std::vector<cv::Rect_<int> >();
00247       }
00248       jsk_perception::NonMaximumSuppression srv_nms;
00249       std::vector<jsk_recognition_msgs::Rect> rect_msg;
00250       for (std::multimap<float, cv::Rect_<int> >::iterator
00251               it = detection_info.begin(); it != detection_info.end(); it++) {
00252          cv::Rect_<int> cv_rect = it->second;
00253          jsk_recognition_msgs::Rect jsk_rect;
00254          jsk_rect.x = cv_rect.x;
00255          jsk_rect.y = cv_rect.y;
00256          jsk_rect.width = cv_rect.width;
00257          jsk_rect.height = cv_rect.height;
00258          srv_nms.request.rect.push_back(jsk_rect);
00259       }
00260       srv_nms.request.threshold = nms_threshold;
00261       std::vector<cv::Rect_<int> > bbox;
00262       if (this->nms_client_.call(srv_nms)) {
00263          for (int i = 0; i < srv_nms.response.bbox_count; i++) {
00264             cv::Rect_<int> brect = cv::Rect_<int>(
00265                srv_nms.response.bbox[i].x,
00266                srv_nms.response.bbox[i].y,
00267                srv_nms.response.bbox[i].width,
00268                srv_nms.response.bbox[i].height);
00269             bbox.push_back(brect);
00270          }
00271       } else {
00272          ROS_ERROR("Failed to call NonMaximumSuppression Module");
00273          return std::vector<cv::Rect_<int> >();
00274       }
00275       return bbox;
00276    }
00277 
00278    void SlidingWindowObjectDetector::concatenateCVMat(
00279       const cv::Mat &mat_1, const cv::Mat &mat_2,
00280       cv::Mat &featureMD, bool iscolwise)
00281    {
00282       if (iscolwise) {
00283          featureMD = cv::Mat(mat_1.rows, (mat_1.cols + mat_2.cols), CV_32F);
00284          for (int i = 0; i < featureMD.rows; i++) {
00285             for (int j = 0; j < mat_1.cols; j++) {
00286                featureMD.at<float>(i, j) = mat_1.at<float>(i, j);
00287             }
00288             for (int j = mat_1.cols; j < featureMD.cols; j++) {
00289                featureMD.at<float>(i, j) = mat_2.at<float>(i, j - mat_1.cols);
00290             }
00291          }
00292       } else {
00293          featureMD = cv::Mat((mat_1.rows + mat_2.rows), mat_1.cols, CV_32F);
00294          for (int i = 0; i < featureMD.cols; i++) {
00295             for (int j = 0; j < mat_1.rows; j++) {
00296                featureMD.at<float>(j, i) = mat_1.at<float>(j, i);
00297             }
00298             for (int j = mat_1.rows; j < featureMD.rows; j++) {
00299                featureMD.at<float>(j, i) = mat_2.at<float>(j - mat_1.rows, i);
00300             }
00301          }
00302       }
00303    }
00304    
00305    void SlidingWindowObjectDetector::convertCvRectToJSKRectArray(
00306       const std::vector<cv::Rect_<int> > &bounding_boxes,
00307       jsk_recognition_msgs::RectArray &jsk_rects,
00308       const int downsize, const cv::Size img_sz)
00309    {
00310       for (std::vector<cv::Rect_<int> >::const_iterator it =
00311               bounding_boxes.begin(); it != bounding_boxes.end(); it++) {
00312          jsk_recognition_msgs::Rect j_r;
00313          j_r.x = it->x * downsize;
00314          j_r.y = it->y * downsize;
00315          j_r.width = it->width * downsize;
00316          j_r.height = it->height * downsize;
00317          jsk_rects.rects.push_back(j_r);
00318       }
00319    }
00320    
00321    void SlidingWindowObjectDetector::loadTrainedDetectorModel()
00322    {
00323       try {
00324          ROS_INFO("--Loading Trained SVM Classifier");
00325 #if CV_MAJOR_VERSION >= 3 // http://docs.opencv.org/master/d3/d46/classcv_1_1Algorithm.html
00326          this->supportVectorMachine_ = cv::ml::SVM::create();
00327          this->supportVectorMachine_ = cv::Algorithm::load<cv::ml::SVM>(this->model_name_);
00328 #else
00329          this->supportVectorMachine_ = boost::shared_ptr<cv::SVM>(new cv::SVM);
00330          this->supportVectorMachine_->load(this->model_name_.c_str());
00331 #endif
00332          ROS_INFO("--Classifier Loaded Successfully");
00333       } catch(cv::Exception &e) {
00334          ROS_ERROR("--ERROR: Fail to load Classifier \n%s", e.what());
00335          std::_Exit(EXIT_FAILURE);
00336       }
00337    }
00338    
00339    void SlidingWindowObjectDetector::readTrainingManifestFromDirectory()
00340    {
00341       cv::FileStorage fs = cv::FileStorage(
00342          this->trainer_manifest_filename_, cv::FileStorage::READ);
00343       if (!fs.isOpened()) {
00344          ROS_ERROR("TRAINER MANIFEST NOT FOUND..");
00345          std::_Exit(EXIT_FAILURE);
00346       }
00347       cv::FileNode n = fs["TrainerInfo"];
00348       std::string ttype = n["trainer_type"];
00349       std::string tpath = n["trainer_path"];
00350       this->model_name_ = tpath;   // classifier path
00351     
00352       n = fs["FeatureInfo"];       // features used
00353       int hog = static_cast<int>(n["HOG"]);
00354       int lbp = static_cast<int>(n["LBP"]);
00355 
00356       n = fs["SlidingWindowInfo"];  // window size
00357       int sw_x = static_cast<int>(n["swindow_x"]);
00358       int sw_y = static_cast<int>(n["swindow_y"]);
00359       this->swindow_x = sw_x;
00360       this->swindow_y = sw_y;
00361     
00362       n = fs["TrainingDatasetDirectoryInfo"];
00363       std::string pfile = n["object_dataset_filename"];
00364       std::string nfile = n["nonobject_dataset_filename"];
00365       std::string dataset_path = n["dataset_path"];
00366       this->object_dataset_filename_ = pfile;  // object dataset
00367       this->nonobject_dataset_filename_ = nfile;
00368       this->ndataset_path_ = dataset_path + nfile;  // ~/.ros/dir/negative/x.bag
00369    }
00370 
00371    void SlidingWindowObjectDetector::setBoundingBoxLabel(
00372       cv::Mat& im, cv::Rect_<int> rect, const std::string label)
00373    {
00374       int fontface = cv::FONT_HERSHEY_SIMPLEX;
00375       double scale = 0.2;
00376       int thickness = 1;
00377       int baseline = 0;
00378       cv::Size text = cv::getTextSize(
00379          label, fontface, scale, thickness, &baseline);
00380       cv::Point pt = cv::Point(
00381          rect.x + (rect.width-text.width), rect.y + (rect.height+text.height));
00382       cv::rectangle(im, pt + cv::Point(0, baseline),
00383                     pt + cv::Point(text.width, -text.height),
00384                     CV_RGB(0, 0, 255), CV_FILLED);
00385       cv::putText(im, label, pt,
00386                   fontface, scale, CV_RGB(255, 0, 0), thickness, 8);
00387    }
00388    
00389    void SlidingWindowObjectDetector::configCallback(
00390       jsk_perception::SlidingWindowObjectDetectorConfig &config, uint32_t level)
00391    {
00392       boost::mutex::scoped_lock lock(mutex_);
00393       this->scale_ = static_cast<float>(config.scaling_factor);
00394       this->stack_size_ = static_cast<int>(config.stack_size);
00395       this->incrementor_ = config.sliding_window_increment;
00396       this->downsize_ = config.image_downsize;
00397       
00398       // currently fixed variables
00399       // this->swindow_x = config.sliding_window_width;
00400       // this->swindow_y = config.sliding_window_height;
00401    }
00402 }  // namespace jsk_perception
00403 
00404 #include <pluginlib/class_list_macros.h>
00405 PLUGINLIB_EXPORT_CLASS(jsk_perception::SlidingWindowObjectDetector, nodelet::Nodelet);