00001
00060
00061
00062
00063 #include <cob_people_detection/detection_tracker_node.h>
00064 #include <munkres/munkres.h>
00065
00066
00067
00068
00069
00070 #include <ros/ros.h>
00071 #include <ros/package.h>
00072
00073
00074 #include <sensor_msgs/Image.h>
00075 #include <cob_perception_msgs/DetectionArray.h>
00076
00077
00078
00079
00080
00081 #include <message_filters/subscriber.h>
00082 #include <message_filters/synchronizer.h>
00083 #include <message_filters/sync_policies/approximate_time.h>
00084 #include <image_transport/image_transport.h>
00085 #include <image_transport/subscriber_filter.h>
00086
00087
00088 #include <opencv2/opencv.hpp>
00089 #include <cv_bridge/cv_bridge.h>
00090 #include <sensor_msgs/image_encodings.h>
00091
00092
00093 #include <boost/bind.hpp>
00094 #include <boost/thread/mutex.hpp>
00095
00096
00097 #include "cob_vision_utils/GlobalDefines.h"
00098
00099
00100 #include "cob_people_detection/timer.h"
00101
00102 #include <sstream>
00103 #include <string>
00104 #include <vector>
00105
00106 using namespace ipa_PeopleDetector;
00107
00108 DetectionTrackerNode::DetectionTrackerNode(ros::NodeHandle nh) :
00109 node_handle_(nh)
00110 {
00111 it_ = 0;
00112 sync_input_2_ = 0;
00113
00114
00115 std::cout << "\n---------------------------\nPeople Detection Parameters:\n---------------------------\n";
00116 node_handle_.param("debug", debug_, false);
00117 std::cout << "debug = " << debug_ << "\n";
00118 node_handle_.param("rosbag_mode", rosbag_mode_, false);
00119 std::cout << "use rosbag mode: " << rosbag_mode_ << "\n";
00120 node_handle_.param("use_people_segmentation", use_people_segmentation_, true);
00121 std::cout << "use_people_segmentation = " << use_people_segmentation_ << "\n";
00122 node_handle_.param("face_redetection_time", face_redetection_time_, 2.0);
00123 std::cout << "face_redetection_time = " << face_redetection_time_ << "\n";
00124 double publish_currently_not_visible_detections_timespan = face_redetection_time_;
00125 node_handle_.param("publish_currently_not_visible_detections_timespan", publish_currently_not_visible_detections_timespan, face_redetection_time_);
00126 std::cout << "publish_currently_not_visible_detections_timespan = " << publish_currently_not_visible_detections_timespan << "\n";
00127 publish_currently_not_visible_detections_timespan_ = ros::Duration(publish_currently_not_visible_detections_timespan);
00128 node_handle_.param("min_segmented_people_ratio_face", min_segmented_people_ratio_face_, 0.7);
00129 std::cout << "min_segmented_people_ratio_face = " << min_segmented_people_ratio_face_ << "\n";
00130 node_handle_.param("min_segmented_people_ratio_head", min_segmented_people_ratio_head_, 0.2);
00131 std::cout << "min_segmented_people_ratio_head = " << min_segmented_people_ratio_head_ << "\n";
00132 node_handle_.param("tracking_range_m", tracking_range_m_, 0.3);
00133 std::cout << "tracking_range_m = " << tracking_range_m_ << "\n";
00134 node_handle_.param("face_identification_score_decay_rate", face_identification_score_decay_rate_, 0.9);
00135 std::cout << "face_identification_score_decay_rate = " << face_identification_score_decay_rate_ << "\n";
00136 node_handle_.param("min_face_identification_score_to_publish", min_face_identification_score_to_publish_, 0.9);
00137 std::cout << "min_face_identification_score_to_publish = " << min_face_identification_score_to_publish_ << "\n";
00138 node_handle_.param("fall_back_to_unknown_identification", fall_back_to_unknown_identification_, true);
00139 std::cout << "fall_back_to_unknown_identification = " << fall_back_to_unknown_identification_ << "\n";
00140 node_handle_.param("display_timing", display_timing_, false);
00141 std::cout << "display_timing = " << display_timing_ << "\n";
00142
00143
00144 it_ = new image_transport::ImageTransport(node_handle_);
00145 people_segmentation_image_sub_.subscribe(*it_, "people_segmentation_image", 1);
00146 face_position_subscriber_.subscribe(node_handle_, "face_position_array_in", 1);
00147
00148
00149 sensor_msgs::Image::ConstPtr nullPtr;
00150 if (use_people_segmentation_ == true)
00151 {
00152 sync_input_2_ = new message_filters::Synchronizer<message_filters::sync_policies::ApproximateTime<cob_perception_msgs::DetectionArray, sensor_msgs::Image> >(2);
00153 sync_input_2_->connectInput(face_position_subscriber_, people_segmentation_image_sub_);
00154 sync_input_2_->registerCallback(boost::bind(&DetectionTrackerNode::inputCallback, this, _1, _2));
00155 }
00156 else
00157 {
00158 face_position_subscriber_.registerCallback(boost::bind(&DetectionTrackerNode::inputCallback, this, _1, nullPtr));
00159 }
00160
00161
00162 face_position_publisher_ = node_handle_.advertise<cob_perception_msgs::DetectionArray>("face_position_array", 1);
00163
00164 std::cout << "DetectionTrackerNode initialized." << std::endl;
00165 }
00166
00167 DetectionTrackerNode::~DetectionTrackerNode()
00168 {
00169 if (it_ != 0)
00170 delete it_;
00171 if (sync_input_2_ != 0)
00172 delete sync_input_2_;
00173 }
00174
00176 unsigned long DetectionTrackerNode::convertColorImageMessageToMat(const sensor_msgs::Image::ConstPtr& image_msg, cv_bridge::CvImageConstPtr& image_ptr, cv::Mat& image)
00177 {
00178 try
00179 {
00180 image_ptr = cv_bridge::toCvShare(image_msg, sensor_msgs::image_encodings::BGR8);
00181 } catch (cv_bridge::Exception& e)
00182 {
00183 ROS_ERROR("PeopleDetection: cv_bridge exception: %s", e.what());
00184 return ipa_Utils::RET_FAILED;
00185 }
00186 image = image_ptr->image;
00187
00188 return ipa_Utils::RET_OK;
00189 }
00190
00197 unsigned long DetectionTrackerNode::copyDetection(const cob_perception_msgs::Detection& src, cob_perception_msgs::Detection& dest, bool update,
00198 unsigned int updateIndex)
00199 {
00200
00201 dest.mask.roi.x = src.mask.roi.x;
00202 dest.mask.roi.y = src.mask.roi.y;
00203 dest.mask.roi.width = src.mask.roi.width;
00204 dest.mask.roi.height = src.mask.roi.height;
00205
00206
00207
00208
00209
00210 dest.pose.pose = src.pose.pose;
00211
00212
00213 if (update == true)
00214 {
00215
00216
00217
00218 if (face_identification_votes_[updateIndex].find(src.label) == face_identification_votes_[updateIndex].end())
00219 face_identification_votes_[updateIndex][src.label] = 1.0;
00220 else
00221 face_identification_votes_[updateIndex][src.label] += 1.0;
00222
00223
00224
00225 double max_score = 0;
00226 dest.label = src.label;
00227 for (std::map<std::string, double>::iterator face_identification_votes_it = face_identification_votes_[updateIndex].begin(); face_identification_votes_it
00228 != face_identification_votes_[updateIndex].end(); face_identification_votes_it++)
00229 {
00230
00231 face_identification_votes_it->second *= face_identification_score_decay_rate_;
00232 std::string label = face_identification_votes_it->first;
00233 if (face_identification_votes_it->second > max_score && (fall_back_to_unknown_identification_ == true || (label != "Unknown" && fall_back_to_unknown_identification_
00234 == false)) && label != "UnknownHead" )
00235 {
00236 max_score = face_identification_votes_it->second;
00237 dest.label = label;
00238 }
00239 }
00240
00241
00242 if (face_identification_votes_[updateIndex][dest.label] > face_identification_votes_[updateIndex]["UnknownHead"])
00243 face_identification_votes_[updateIndex]["UnknownHead"] = face_identification_votes_[updateIndex][dest.label];
00244 if (fall_back_to_unknown_identification_ == false)
00245 {
00246 if (face_identification_votes_[updateIndex]["Unknown"] > face_identification_votes_[updateIndex]["UnknownHead"])
00247 face_identification_votes_[updateIndex]["UnknownHead"] = face_identification_votes_[updateIndex]["Unknown"];
00248 }
00249 }
00250 else
00251 dest.label = src.label;
00252
00253 if (dest.label == "UnknownHead")
00254 dest.label = "Unknown";
00255
00256
00257
00258
00259
00260
00261
00262 dest.detector = src.detector;
00263
00264 dest.header = src.header;
00265
00266 return ipa_Utils::RET_OK;
00267 }
00268
00269 inline double abs(double x)
00270 {
00271 return ((x < 0) ? -x : x);
00272 }
00273
00277 double DetectionTrackerNode::computeFacePositionDistanceTrackingRange(const cob_perception_msgs::Detection& previous_detection,
00278 const cob_perception_msgs::Detection& current_detection)
00279 {
00280 const geometry_msgs::Point* point_1 = &(previous_detection.pose.pose.position);
00281 const geometry_msgs::Point* point_2 = &(current_detection.pose.pose.position);
00282
00283 double dx = abs(point_1->x - point_2->x);
00284 double dy = abs(point_1->y - point_2->y);
00285 double dz = abs(point_1->z - point_2->z);
00286
00287
00288 if (dx > tracking_range_m_ || dy > tracking_range_m_ || dz > tracking_range_m_)
00289 {
00290
00291 return DBL_MAX;
00292 }
00293
00294
00295 double dist = dx * dx + dy * dy + dz * dz;
00296 return dist;
00297 }
00298
00299 double DetectionTrackerNode::computeFacePositionDistance(const cob_perception_msgs::Detection& previous_detection,
00300 const cob_perception_msgs::Detection& current_detection)
00301 {
00302 const geometry_msgs::Point* point_1 = &(previous_detection.pose.pose.position);
00303 const geometry_msgs::Point* point_2 = &(current_detection.pose.pose.position);
00304
00305 double dx = point_1->x - point_2->x;
00306 double dy = point_1->y - point_2->y;
00307 double dz = point_1->z - point_2->z;
00308
00309 return sqrt(dx * dx + dy * dy + dz * dz);
00310 }
00311
00314 unsigned long DetectionTrackerNode::removeMultipleInstancesOfLabel()
00315 {
00316
00317 for (int i = 0; i < (int)face_position_accumulator_.size(); i++)
00318 {
00319
00320 std::string label = face_position_accumulator_[i].label;
00321
00322
00323 if (label != "Unknown" && label != "No face")
00324 {
00325 for (int j = 0; j < (int)face_position_accumulator_.size(); j++)
00326 {
00327 if (j == i)
00328 continue;
00329
00330 if (face_position_accumulator_[j].label == label)
00331 {
00332 if (debug_)
00333 std::cout << "face_identification_votes_[i][" << label << "] = " << face_identification_votes_[i][label] << " face_identification_votes_[j][" << label
00334 << "] = " << face_identification_votes_[j][label] << "\n";
00335
00336
00337 if (face_identification_votes_[i][label] < face_identification_votes_[j][label])
00338 {
00339 face_position_accumulator_[i].label = "Unknown";
00340
00341 if (face_identification_votes_[i][label] > face_identification_votes_[i]["Unknown"])
00342 face_identification_votes_[i]["Unknown"] = face_identification_votes_[i][label];
00343 }
00344 }
00345 }
00346 }
00347 }
00348
00349 return ipa_Utils::RET_OK;
00350 }
00351
00352 unsigned long DetectionTrackerNode::prepareFacePositionMessage(cob_perception_msgs::DetectionArray& face_position_msg_out, ros::Time image_recording_time, std::string frame_id)
00353 {
00354
00355 std::vector < cob_perception_msgs::Detection > faces_to_publish;
00356 for (int i = 0; i < (int)face_position_accumulator_.size(); i++)
00357 {
00358 if (debug_)
00359 std::cout << "'UnknownHead' score: " << face_identification_votes_[i]["UnknownHead"] << " label '" << face_position_accumulator_[i].label << "' score: "
00360 << face_identification_votes_[i][face_position_accumulator_[i].label] << " - ";
00361 if ((face_identification_votes_[i][face_position_accumulator_[i].label] > min_face_identification_score_to_publish_ || face_identification_votes_[i]["UnknownHead"]
00362 > min_face_identification_score_to_publish_) && ((image_recording_time - face_position_accumulator_[i].header.stamp)
00363 < publish_currently_not_visible_detections_timespan_))
00364 {
00365 faces_to_publish.push_back(face_position_accumulator_[i]);
00366 if (debug_)
00367 std::cout << "published\n";
00368 }
00369 else if (debug_)
00370 std::cout << "not published\n";
00371 }
00372 face_position_msg_out.detections = faces_to_publish;
00373
00374
00375
00376
00377
00378
00379 face_position_msg_out.header.stamp = image_recording_time;
00380 face_position_msg_out.header.frame_id = frame_id;
00381 return ipa_Utils::RET_OK;
00382 }
00383
00385 void DetectionTrackerNode::inputCallback(const cob_perception_msgs::DetectionArray::ConstPtr& face_position_msg_in,
00386 const sensor_msgs::Image::ConstPtr& people_segmentation_image_msg)
00387 {
00388
00389
00390
00391
00392
00393
00394
00395 cv_bridge::CvImageConstPtr people_segmentation_image_ptr;
00396 cv::Mat people_segmentation_image;
00397 if (use_people_segmentation_ == true)
00398 convertColorImageMessageToMat(people_segmentation_image_msg, people_segmentation_image_ptr, people_segmentation_image);
00399
00400 if (debug_)
00401 std::cout << "incoming detections: " << face_position_msg_in->detections.size() << "\n";
00402
00403
00404 ros::Duration timeSpan(face_redetection_time_);
00405
00406 if (!rosbag_mode_)
00407 {
00408 for (int i = (int)face_position_accumulator_.size() - 1; i >= 0; i--)
00409 {
00410 if ((ros::Time::now() - face_position_accumulator_[i].header.stamp) > timeSpan)
00411 {
00412 face_position_accumulator_.erase(face_position_accumulator_.begin() + i);
00413 face_identification_votes_.erase(face_identification_votes_.begin() + i);
00414 }
00415 }
00416 if (debug_)
00417 std::cout << "Old face positions deleted.\n";
00418 }
00419
00420
00421 std::vector<int> face_detection_indices;
00422 if (use_people_segmentation_ == true)
00423 {
00424 for (int i = 0; i < (int)face_position_msg_in->detections.size(); i++)
00425 {
00426 const cob_perception_msgs::Detection& det_in = face_position_msg_in->detections[i];
00427 cv::Rect face;
00428 face.x = det_in.mask.roi.x;
00429 face.y = det_in.mask.roi.y;
00430 face.width = det_in.mask.roi.width;
00431 face.height = det_in.mask.roi.height;
00432
00433
00434 int numberBlackPixels = 0;
00435 for (int v = face.y; v < face.y + face.height; v++)
00436 {
00437 uchar* data = people_segmentation_image.ptr(v);
00438 for (int u = face.x; u < face.x + face.width; u++)
00439 {
00440 int index = 3 * u;
00441 if (data[index] == 0 && data[index + 1] == 0 && data[index + 2] == 0)
00442 numberBlackPixels++;
00443 }
00444 }
00445 int faceArea = face.height * face.width;
00446 double segmentedPeopleRatio = (double)(faceArea - numberBlackPixels) / (double)faceArea;
00447
00448
00449 if ((det_in.detector == "face" && segmentedPeopleRatio < min_segmented_people_ratio_face_) || (det_in.detector == "head" && segmentedPeopleRatio
00450 < min_segmented_people_ratio_head_))
00451 {
00452
00453 if (debug_)
00454 std::cout << "False detection\n";
00455 }
00456 else
00457 {
00458
00459 face_detection_indices.push_back(i);
00460 }
00461 }
00462 if (debug_)
00463 std::cout << "Verification with people segmentation done.\n";
00464 }
00465 else
00466 {
00467 for (unsigned int i = 0; i < face_position_msg_in->detections.size(); i++)
00468 face_detection_indices.push_back(i);
00469 }
00470
00471
00472 std::vector<bool> current_detection_has_matching(face_detection_indices.size(), false);
00473
00474 if (false)
00475 {
00476
00477 std::map<int, std::map<int, double> > distance_matrix;
00478 std::map<int, std::map<int, double> >::iterator distance_matrix_it;
00479 for (unsigned int previous_det = 0; previous_det < face_position_accumulator_.size(); previous_det++)
00480 {
00481 std::map<int, double> distance_row;
00482 for (unsigned int i = 0; i < face_detection_indices.size(); i++)
00483 distance_row[face_detection_indices[i]] = computeFacePositionDistanceTrackingRange(face_position_accumulator_[previous_det],
00484 face_position_msg_in->detections[face_detection_indices[i]]);
00485 distance_matrix[previous_det] = distance_row;
00486 }
00487 if (debug_)
00488 std::cout << "Distance matrix.\n";
00489
00490
00491 unsigned int number_matchings = (face_position_accumulator_.size() < face_detection_indices.size()) ? face_position_accumulator_.size() : face_detection_indices.size();
00492 for (unsigned int m = 0; m < number_matchings; m++)
00493 {
00494
00495 double min_dist = DBL_MAX;
00496 int previous_min_index, current_min_index;
00497 for (distance_matrix_it = distance_matrix.begin(); distance_matrix_it != distance_matrix.end(); distance_matrix_it++)
00498 {
00499 for (std::map<int, double>::iterator distance_row_it = distance_matrix_it->second.begin(); distance_row_it != distance_matrix_it->second.end(); distance_row_it++)
00500 {
00501 if (distance_row_it->second < min_dist)
00502 {
00503 min_dist = distance_row_it->second;
00504 previous_min_index = distance_matrix_it->first;
00505 current_min_index = distance_row_it->first;
00506 }
00507 }
00508 }
00509
00510
00511
00512
00513
00514 if (min_dist == DBL_MAX)
00515 break;
00516
00517
00518 copyDetection(face_position_msg_in->detections[current_min_index], face_position_accumulator_[previous_min_index], true, previous_min_index);
00519
00520 for (unsigned int i = 0; i < face_detection_indices.size(); i++)
00521 if (face_detection_indices[i] == current_min_index)
00522 current_detection_has_matching[i] = true;
00523
00524
00525 distance_matrix.erase(previous_min_index);
00526 for (distance_matrix_it = distance_matrix.begin(); distance_matrix_it != distance_matrix.end(); distance_matrix_it++)
00527 distance_matrix_it->second.erase(current_min_index);
00528 }
00529 }
00530
00531 else
00532 {
00533
00534 std::vector < std::vector<int> > costs_matrix(face_position_accumulator_.size(), std::vector<int>(face_detection_indices.size(), 0));
00535 if (debug_)
00536 std::cout << "Costs matrix.\n";
00537 for (unsigned int previous_det = 0; previous_det < face_position_accumulator_.size(); previous_det++)
00538 {
00539 for (unsigned int i = 0; i < face_detection_indices.size(); i++)
00540 {
00541 costs_matrix[previous_det][i] = 100 * computeFacePositionDistance(face_position_accumulator_[previous_det],
00542 face_position_msg_in->detections[face_detection_indices[i]]) + 100 * tracking_range_m_
00543 * (face_position_msg_in->detections[face_detection_indices[i]].label.compare(face_position_accumulator_[previous_det].label) == 0 ? 0 : 1);
00544 if (debug_)
00545 std::cout << costs_matrix[previous_det][i] << "\t";
00546 }
00547 if (debug_)
00548 std::cout << std::endl;
00549 }
00550
00551 if (face_position_accumulator_.size() != 0 && face_detection_indices.size() != 0)
00552 {
00553
00554 munkres assignmentProblem;
00555 assignmentProblem.set_diag(false);
00556 assignmentProblem.load_weights(costs_matrix);
00557 int num_rows = std::min(costs_matrix.size(), costs_matrix[0].size());
00558
00559 ordered_pair *optimalAssignment = new ordered_pair[num_rows];
00560 assignmentProblem.assign(optimalAssignment);
00561 if (debug_)
00562 std::cout << "Assignment problem solved.\n";
00563
00564
00565 for (int i = 0; i < num_rows; i++)
00566 {
00567 int current_match_index = 0, previous_match_index = 0;
00568 if (face_position_accumulator_.size() < face_detection_indices.size())
00569 {
00570
00571 previous_match_index = optimalAssignment[i].row;
00572 current_match_index = optimalAssignment[i].col;
00573 }
00574 else
00575 {
00576
00577 previous_match_index = optimalAssignment[i].col;
00578 current_match_index = optimalAssignment[i].row;
00579 }
00580
00581
00582 copyDetection(face_position_msg_in->detections[current_match_index], face_position_accumulator_[previous_match_index], true, previous_match_index);
00583
00584 for (unsigned int i = 0; i < face_detection_indices.size(); i++)
00585 if (face_detection_indices[i] == current_match_index)
00586 current_detection_has_matching[i] = true;
00587 }
00588
00589 delete optimalAssignment;
00590 }
00591 }
00592 if (debug_)
00593 std::cout << "Matches found.\n";
00594
00595
00596 for (unsigned int i = 0; i < face_detection_indices.size(); i++)
00597 {
00598 if (current_detection_has_matching[i] == false)
00599 {
00600 const cob_perception_msgs::Detection& det_in = face_position_msg_in->detections[face_detection_indices[i]];
00601 if (det_in.detector == "face")
00602 {
00603
00604 if (debug_)
00605 std::cout << "\n***** New detection *****\n\n";
00606 cob_perception_msgs::Detection det_out;
00607 copyDetection(det_in, det_out, false);
00608 det_out.pose.header = face_position_msg_in->header;
00609 face_position_accumulator_.push_back(det_out);
00610
00611 std::map<std::string, double> new_identification_data;
00612
00613 new_identification_data["UnknownHead"] = 0.0;
00614 new_identification_data[det_in.label] = 1.0;
00615 face_identification_votes_.push_back(new_identification_data);
00616 }
00617 }
00618 }
00619 if (debug_)
00620 std::cout << "New detections.\n";
00621
00622
00623 removeMultipleInstancesOfLabel();
00624
00625
00626 ros::Time image_recording_time = (face_position_msg_in->detections.size() > 0 ? face_position_msg_in->detections[0].header.stamp : ros::Time(0));
00627 std::string frame_id = (face_position_msg_in->detections.size() > 0 ? face_position_msg_in->detections[0].header.frame_id : "");
00628 cob_perception_msgs::DetectionArray face_position_msg_out;
00629 prepareFacePositionMessage(face_position_msg_out, image_recording_time, frame_id);
00630 face_position_msg_out.header = face_position_msg_in->header;
00631 face_position_publisher_.publish(face_position_msg_out);
00632
00633
00634
00635
00636
00637
00638
00639
00640
00641
00642
00643
00644
00645
00646
00647
00648
00649
00650
00651
00652
00653
00654
00655
00656
00657
00658
00659
00660
00661
00662
00663
00664
00665
00666
00667
00668
00669
00670
00671 if (display_timing_ == true)
00672 ROS_INFO("%d DetectionTracker: Time stamp of pointcloud message: %f. Delay: %f.", face_position_msg_in->header.seq, face_position_msg_in->header.stamp.toSec(), ros::Time::now().toSec()-face_position_msg_in->header.stamp.toSec());
00673
00674 }
00675
00676
00677
00678 int main(int argc, char** argv)
00679 {
00680
00681 ros::init(argc, argv, "detection_tracker");
00682
00683
00684 ros::NodeHandle nh("~");
00685
00686
00687 DetectionTrackerNode detection_tracker_node(nh);
00688
00689
00690
00691
00692
00693
00694 ros::spin();
00695
00696 return 0;
00697 }