00001
00002
00003 #include <jsk_perception/sliding_window_object_detector.h>
00004 #include <jsk_recognition_msgs/Rect.h>
00005 #include <jsk_perception/NonMaximumSuppression.h>
00006
00007 namespace jsk_perception
00008 {
00009 void SlidingWindowObjectDetector::onInit()
00010 {
00011 DiagnosticNodelet::onInit();
00012
00013 this->nms_client_ = pnh_->serviceClient<
00014 jsk_perception::NonMaximumSuppression>("non_maximum_suppression");
00015 this->srv_ = boost::make_shared<dynamic_reconfigure::Server<
00016 jsk_perception::SlidingWindowObjectDetectorConfig> >(*pnh_);
00017 dynamic_reconfigure::Server<
00018 jsk_perception::SlidingWindowObjectDetectorConfig>::CallbackType f =
00019 boost::bind(&SlidingWindowObjectDetector::configCallback, this, _1, _2);
00020 this->srv_->setCallback(f);
00021
00022 pnh_->getParam("run_type", this->run_type_);
00023 pnh_->getParam("trainer_manifest", this->trainer_manifest_filename_);
00024
00025 ROS_INFO("RUN TYPE: %s", run_type_.c_str());
00026 ROS_INFO("LOADED TRAINER MANIFEST: %s", trainer_manifest_filename_.c_str());
00027
00028 this->readTrainingManifestFromDirectory();
00029 loadTrainedDetectorModel();
00030 if (this->run_type_.compare("BOOTSTRAPER") == 0) {
00031 try {
00032 std::string _topic = "/dataset/background/roi";
00033 boost::shared_ptr<rosbag::Bag> tmp_bag(new rosbag::Bag);
00034 tmp_bag->open(this->ndataset_path_, rosbag::bagmode::Read);
00035 ROS_INFO("Bag Found and Opened Successfully ...");
00036 std::vector<std::string> topics;
00037 topics.push_back(_topic);
00038 rosbag::View view(*tmp_bag, rosbag::TopicQuery(topics));
00039 std::vector<sensor_msgs::Image> tmp_imgs;
00040 BOOST_FOREACH(rosbag::MessageInstance const m, view) {
00041 sensor_msgs::Image::ConstPtr img_msg = m.instantiate<
00042 sensor_msgs::Image>();
00043 tmp_imgs.push_back(*img_msg);
00044 }
00045 tmp_bag->close();
00046 this->rosbag_ = boost::shared_ptr<rosbag::Bag>(new rosbag::Bag);
00047 this->rosbag_->open(this->ndataset_path_, rosbag::bagmode::Write);
00048 for (std::vector<sensor_msgs::Image>::iterator it = tmp_imgs.begin();
00049 it != tmp_imgs.end(); it++) {
00050 this->rosbag_->write(_topic, ros::Time::now(), *it);
00051 }
00052 } catch (ros::Exception &e) {
00053 ROS_ERROR("ERROR: Bag File not found..\n%s", e.what());
00054 std::_Exit(EXIT_FAILURE);
00055 }
00056 }
00057 this->pub_rects_ = advertise<jsk_recognition_msgs::RectArray>(
00058 *pnh_, "output/rects", 1);
00059 this->pub_image_ = advertise<sensor_msgs::Image>(
00060 *pnh_, "output/image", 1);
00061 }
00062
00063 void SlidingWindowObjectDetector::subscribe()
00064 {
00065 ROS_INFO("Subscribing...");
00066 this->sub_image_ = pnh_->subscribe(
00067 "input", 1, &SlidingWindowObjectDetector::imageCb, this);
00068 }
00069
00070 void SlidingWindowObjectDetector::unsubscribe()
00071 {
00072 NODELET_DEBUG("Unsubscribing from ROS topic.");
00073 this->sub_image_.shutdown();
00074 }
00075
00076 void SlidingWindowObjectDetector::imageCb(const sensor_msgs::ImageConstPtr& msg)
00077 {
00078 cv_bridge::CvImagePtr cv_ptr;
00079 try {
00080 cv_ptr = cv_bridge::toCvCopy(msg, sensor_msgs::image_encodings::BGR8);
00081 } catch (cv_bridge::Exception& e) {
00082 ROS_ERROR("cv_bridge exception: %s", e.what());
00083 return;
00084 }
00085 cv::Mat image;
00086 cv::Size isize = cv_ptr->image.size();
00087
00088 const int downsize = this->downsize_;
00089 const float scale = this->scale_;
00090 const int img_stack = this->stack_size_;
00091 const int incrementor = this->incrementor_;
00092 cv::resize(cv_ptr->image, image, cv::Size(
00093 isize.width/downsize, isize.height/downsize));
00094 std::multimap<float, cv::Rect_<int> > detection_info =
00095 this->runSlidingWindowDetector(image, cv::Size(
00096 this->swindow_x, this->swindow_y),
00097 scale, img_stack, incrementor);
00098 cv::Mat dimg = image.clone();
00099 ROS_INFO("--Info Size: %ld", detection_info.size());
00100 for (std::multimap<float, cv::Rect_<int> >::iterator
00101 it = detection_info.begin(); it != detection_info.end(); it++) {
00102 cv::rectangle(dimg, it->second, cv::Scalar(0, 0, 255), 2);
00103 }
00104 if (this->run_type_.compare("DETECTOR") == 0) {
00105 const float nms_threshold = 0.01;
00106 std::vector<cv::Rect_<int> > object_rects = this->nonMaximumSuppression(
00107 detection_info, nms_threshold);
00108 cv::Mat bimg = image.clone();
00109 for (std::vector<cv::Rect_<int> >::iterator it = object_rects.begin();
00110 it != object_rects.end(); it++) {
00111 this->setBoundingBoxLabel(bimg, *it);
00112 cv::rectangle(bimg, *it, cv::Scalar(0, 255, 0), 1);
00113 }
00114 jsk_recognition_msgs::RectArray jsk_rect_array;
00115 this->convertCvRectToJSKRectArray(
00116 object_rects, jsk_rect_array, downsize, isize);
00117 jsk_rect_array.header = msg->header;
00118 cv_bridge::CvImagePtr out_msg(new cv_bridge::CvImage);
00119 out_msg->header = msg->header;
00120 out_msg->encoding = sensor_msgs::image_encodings::BGR8;
00121 out_msg->image = bimg.clone();
00122 this->pub_rects_.publish(jsk_rect_array);
00123 this->pub_image_.publish(out_msg->toImageMsg());
00124 } else if (this->run_type_.compare("BOOTSTRAPER") == 0) {
00125 for (std::multimap<float, cv::Rect_<int> >::const_iterator
00126 it = detection_info.begin(); it != detection_info.end(); it++) {
00127 cv::Mat roi = image(it->second).clone();
00128 cv::resize(
00129 roi, roi, cv::Size(
00130 roi.cols * this->downsize_, roi.rows * this->downsize_));
00131 if (roi.data) {
00132 ROS_INFO("Writing to bag file");
00133 cv_bridge::CvImagePtr write_roi(new cv_bridge::CvImage);
00134 write_roi->header = msg->header;
00135 write_roi->encoding = sensor_msgs::image_encodings::BGR8;
00136 write_roi->image = roi.clone();
00137 this->rosbag_->write("/dataset/background/roi",
00138 ros::Time::now(), write_roi->toImageMsg());
00139 cv::imshow("write_roi", roi);
00140 cv::waitKey(3);
00141 }
00142 }
00143 } else {
00144 this->pub_image_.publish(cv_ptr->toImageMsg());
00145 ROS_ERROR("NODELET RUNTYPE IS NOT SET.");
00146 std::_Exit(EXIT_FAILURE);
00147 }
00148 }
00149
00150 std::multimap<float, cv::Rect_<int> >
00151 SlidingWindowObjectDetector::runSlidingWindowDetector(
00152 const cv::Mat &image, const cv::Size wsize, const float scale,
00153 const int scale_counter, const int incrementor)
00154 {
00155 if (image.empty()) {
00156 ROS_ERROR("--INPUT IMAGE IS EMPTY");
00157 return std::multimap<float, cv::Rect_<int> >();
00158 }
00159 cv::Size nwsize = wsize;
00160 int scounter = 0;
00161 std::multimap<float, cv::Rect_<int> > detection_info;
00162 int sw_incrementor = incrementor;
00163 while (scounter++ < scale_counter) {
00164 this->objectRecognizer(image, detection_info, nwsize, sw_incrementor);
00165 this->pyramidialScaling(nwsize, scale);
00166 sw_incrementor += (sw_incrementor * scale);
00167 }
00168 return detection_info;
00169 }
00170
00171 void SlidingWindowObjectDetector::objectRecognizer(
00172 const cv::Mat &image, std::multimap<float, cv::Rect_<int> > &detection_info,
00173 const cv::Size wsize, const int incrementor)
00174 {
00175 for (int j = 0; j < image.rows; j += incrementor) {
00176 for (int i = 0; i < image.cols; i += incrementor) {
00177 cv::Rect_<int> rect = cv::Rect_<int>(i, j, wsize.width, wsize.height);
00178 if ((rect.x + rect.width <= image.cols) &&
00179 (rect.y + rect.height <= image.rows)) {
00180 cv::Mat roi = image(rect).clone();
00181 cv::GaussianBlur(roi, roi, cv::Size(3, 3), 1.0);
00182 cv::resize(roi, roi, cv::Size(this->swindow_x, this->swindow_y));
00183 cv::Mat hog_feature = this->computeHOG(roi);
00184 cv::Mat hsv_feature;
00185 this->computeHSHistogram(roi, hsv_feature, 16, 16, true);
00186 hsv_feature = hsv_feature.reshape(1, 1);
00187 cv::Mat _feature = hog_feature;
00188 this->concatenateCVMat(hog_feature, hsv_feature, _feature);
00189 #if CV_MAJOR_VERSION >= 3
00190 cv::Mat _ret;
00191 float response = this->supportVectorMachine_->predict(
00192 _feature, _ret, false);
00193 #else
00194 float response = this->supportVectorMachine_->predict(
00195 _feature, false);
00196 #endif
00197 if (response == 1) {
00198 detection_info.insert(std::make_pair(response, rect));
00199 } else {
00200 continue;
00201 }
00202 }
00203 }
00204 }
00205 }
00206
00210 void SlidingWindowObjectDetector::computeHSHistogram(
00211 cv::Mat &src, cv::Mat &hist, const int hBin, const int sBin, bool is_norm)
00212 {
00213 if (src.empty()) {
00214 return;
00215 }
00216 cv::Mat hsv;
00217 cv::cvtColor(src, hsv, CV_BGR2HSV);
00218 int histSize[] = {hBin, sBin};
00219 float h_ranges[] = {0, 180};
00220 float s_ranges[] = {0, 256};
00221 const float* ranges[] = {h_ranges, s_ranges};
00222 int channels[] = {0, 1};
00223 cv::calcHist(
00224 &hsv, 1, channels, cv::Mat(), hist, 2, histSize, ranges, true, false);
00225 if (is_norm) {
00226 cv::normalize(hist, hist, 0, 1, cv::NORM_MINMAX, -1, cv::Mat());
00227 }
00228 }
00229
00230 void SlidingWindowObjectDetector::pyramidialScaling(
00231 cv::Size &wsize, const float scale)
00232 {
00233 float nwidth = wsize.width + (wsize.width * scale);
00234 float nheight = wsize.height + (wsize.height * scale);
00235 const int min_swindow_size = 16;
00236 nwidth = (nwidth < min_swindow_size) ? min_swindow_size : nwidth;
00237 nheight = (nheight < min_swindow_size) ? min_swindow_size : nheight;
00238 wsize = cv::Size(std::abs(nwidth), std::abs(nheight));
00239 }
00240
00241 std::vector<cv::Rect_<int> > SlidingWindowObjectDetector::nonMaximumSuppression(
00242 std::multimap<float, cv::Rect_<int> > &detection_info,
00243 const float nms_threshold)
00244 {
00245 if (detection_info.empty()) {
00246 return std::vector<cv::Rect_<int> >();
00247 }
00248 jsk_perception::NonMaximumSuppression srv_nms;
00249 std::vector<jsk_recognition_msgs::Rect> rect_msg;
00250 for (std::multimap<float, cv::Rect_<int> >::iterator
00251 it = detection_info.begin(); it != detection_info.end(); it++) {
00252 cv::Rect_<int> cv_rect = it->second;
00253 jsk_recognition_msgs::Rect jsk_rect;
00254 jsk_rect.x = cv_rect.x;
00255 jsk_rect.y = cv_rect.y;
00256 jsk_rect.width = cv_rect.width;
00257 jsk_rect.height = cv_rect.height;
00258 srv_nms.request.rect.push_back(jsk_rect);
00259 }
00260 srv_nms.request.threshold = nms_threshold;
00261 std::vector<cv::Rect_<int> > bbox;
00262 if (this->nms_client_.call(srv_nms)) {
00263 for (int i = 0; i < srv_nms.response.bbox_count; i++) {
00264 cv::Rect_<int> brect = cv::Rect_<int>(
00265 srv_nms.response.bbox[i].x,
00266 srv_nms.response.bbox[i].y,
00267 srv_nms.response.bbox[i].width,
00268 srv_nms.response.bbox[i].height);
00269 bbox.push_back(brect);
00270 }
00271 } else {
00272 ROS_ERROR("Failed to call NonMaximumSuppression Module");
00273 return std::vector<cv::Rect_<int> >();
00274 }
00275 return bbox;
00276 }
00277
00278 void SlidingWindowObjectDetector::concatenateCVMat(
00279 const cv::Mat &mat_1, const cv::Mat &mat_2,
00280 cv::Mat &featureMD, bool iscolwise)
00281 {
00282 if (iscolwise) {
00283 featureMD = cv::Mat(mat_1.rows, (mat_1.cols + mat_2.cols), CV_32F);
00284 for (int i = 0; i < featureMD.rows; i++) {
00285 for (int j = 0; j < mat_1.cols; j++) {
00286 featureMD.at<float>(i, j) = mat_1.at<float>(i, j);
00287 }
00288 for (int j = mat_1.cols; j < featureMD.cols; j++) {
00289 featureMD.at<float>(i, j) = mat_2.at<float>(i, j - mat_1.cols);
00290 }
00291 }
00292 } else {
00293 featureMD = cv::Mat((mat_1.rows + mat_2.rows), mat_1.cols, CV_32F);
00294 for (int i = 0; i < featureMD.cols; i++) {
00295 for (int j = 0; j < mat_1.rows; j++) {
00296 featureMD.at<float>(j, i) = mat_1.at<float>(j, i);
00297 }
00298 for (int j = mat_1.rows; j < featureMD.rows; j++) {
00299 featureMD.at<float>(j, i) = mat_2.at<float>(j - mat_1.rows, i);
00300 }
00301 }
00302 }
00303 }
00304
00305 void SlidingWindowObjectDetector::convertCvRectToJSKRectArray(
00306 const std::vector<cv::Rect_<int> > &bounding_boxes,
00307 jsk_recognition_msgs::RectArray &jsk_rects,
00308 const int downsize, const cv::Size img_sz)
00309 {
00310 for (std::vector<cv::Rect_<int> >::const_iterator it =
00311 bounding_boxes.begin(); it != bounding_boxes.end(); it++) {
00312 jsk_recognition_msgs::Rect j_r;
00313 j_r.x = it->x * downsize;
00314 j_r.y = it->y * downsize;
00315 j_r.width = it->width * downsize;
00316 j_r.height = it->height * downsize;
00317 jsk_rects.rects.push_back(j_r);
00318 }
00319 }
00320
00321 void SlidingWindowObjectDetector::loadTrainedDetectorModel()
00322 {
00323 try {
00324 ROS_INFO("--Loading Trained SVM Classifier");
00325 #if CV_MAJOR_VERSION >= 3 // http://docs.opencv.org/master/d3/d46/classcv_1_1Algorithm.html
00326 this->supportVectorMachine_ = cv::ml::SVM::create();
00327 this->supportVectorMachine_ = cv::Algorithm::load<cv::ml::SVM>(this->model_name_);
00328 #else
00329 this->supportVectorMachine_ = boost::shared_ptr<cv::SVM>(new cv::SVM);
00330 this->supportVectorMachine_->load(this->model_name_.c_str());
00331 #endif
00332 ROS_INFO("--Classifier Loaded Successfully");
00333 } catch(cv::Exception &e) {
00334 ROS_ERROR("--ERROR: Fail to load Classifier \n%s", e.what());
00335 std::_Exit(EXIT_FAILURE);
00336 }
00337 }
00338
00339 void SlidingWindowObjectDetector::readTrainingManifestFromDirectory()
00340 {
00341 cv::FileStorage fs = cv::FileStorage(
00342 this->trainer_manifest_filename_, cv::FileStorage::READ);
00343 if (!fs.isOpened()) {
00344 ROS_ERROR("TRAINER MANIFEST NOT FOUND..");
00345 std::_Exit(EXIT_FAILURE);
00346 }
00347 cv::FileNode n = fs["TrainerInfo"];
00348 std::string ttype = n["trainer_type"];
00349 std::string tpath = n["trainer_path"];
00350 this->model_name_ = tpath;
00351
00352 n = fs["FeatureInfo"];
00353 int hog = static_cast<int>(n["HOG"]);
00354 int lbp = static_cast<int>(n["LBP"]);
00355
00356 n = fs["SlidingWindowInfo"];
00357 int sw_x = static_cast<int>(n["swindow_x"]);
00358 int sw_y = static_cast<int>(n["swindow_y"]);
00359 this->swindow_x = sw_x;
00360 this->swindow_y = sw_y;
00361
00362 n = fs["TrainingDatasetDirectoryInfo"];
00363 std::string pfile = n["object_dataset_filename"];
00364 std::string nfile = n["nonobject_dataset_filename"];
00365 std::string dataset_path = n["dataset_path"];
00366 this->object_dataset_filename_ = pfile;
00367 this->nonobject_dataset_filename_ = nfile;
00368 this->ndataset_path_ = dataset_path + nfile;
00369 }
00370
00371 void SlidingWindowObjectDetector::setBoundingBoxLabel(
00372 cv::Mat& im, cv::Rect_<int> rect, const std::string label)
00373 {
00374 int fontface = cv::FONT_HERSHEY_SIMPLEX;
00375 double scale = 0.2;
00376 int thickness = 1;
00377 int baseline = 0;
00378 cv::Size text = cv::getTextSize(
00379 label, fontface, scale, thickness, &baseline);
00380 cv::Point pt = cv::Point(
00381 rect.x + (rect.width-text.width), rect.y + (rect.height+text.height));
00382 cv::rectangle(im, pt + cv::Point(0, baseline),
00383 pt + cv::Point(text.width, -text.height),
00384 CV_RGB(0, 0, 255), CV_FILLED);
00385 cv::putText(im, label, pt,
00386 fontface, scale, CV_RGB(255, 0, 0), thickness, 8);
00387 }
00388
00389 void SlidingWindowObjectDetector::configCallback(
00390 jsk_perception::SlidingWindowObjectDetectorConfig &config, uint32_t level)
00391 {
00392 boost::mutex::scoped_lock lock(mutex_);
00393 this->scale_ = static_cast<float>(config.scaling_factor);
00394 this->stack_size_ = static_cast<int>(config.stack_size);
00395 this->incrementor_ = config.sliding_window_increment;
00396 this->downsize_ = config.image_downsize;
00397
00398
00399
00400
00401 }
00402 }
00403
00404 #include <pluginlib/class_list_macros.h>
00405 PLUGINLIB_EXPORT_CLASS(jsk_perception::SlidingWindowObjectDetector, nodelet::Nodelet);