cob_people_detection: cob_people_detection: faces.cpp Source File

00001 /*********************************************************************
00002  * Faces-specific computer vision algorithms.
00003  *
00004  **********************************************************************
00005  *
00006  * Software License Agreement (BSD License)
00007  * 
00008  *  Copyright (c) 2008, Willow Garage, Inc.
00009  *  All rights reserved.
00010  * 
00011  *  Redistribution and use in source and binary forms, with or without
00012  *  modification, are permitted provided that the following conditions
00013  *  are met:
00014  * 
00015  *   * Redistributions of source code must retain the above copyright
00016  *     notice, this list of conditions and the following disclaimer.
00017  *   * Redistributions in binary form must reproduce the above
00018  *     copyright notice, this list of conditions and the following
00019  *     disclaimer in the documentation and/or other materials provided
00020  *     with the distribution.
00021  *   * Neither the name of the Willow Garage nor the names of its
00022  *     contributors may be used to endorse or promote products derived
00023  *     from this software without specific prior written permission.
00024  * 
00025  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00026  *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00027  *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
00028  *  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
00029  *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
00030  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
00031  *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00032  *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
00033  *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00034  *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
00035  *  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00036  *  POSSIBILITY OF SUCH DAMAGE.
00037  *********************************************************************/
00038 
00039 /* Author: Caroline Pantofaru
00040  * Modified for Kinect/Point Cloud support: Richard Bormann */
00041 
00042 #include "cob_people_detection/faces.h"
00043 #include <cfloat>
00044 #include <cctype>
00045 
00046 namespace people {
00047 
00048 Faces::Faces():
00049   list_(NULL),
00050   cam_model_(NULL) {
00051 }
00052 
00053 Faces::~Faces() {
00054   // Kill all the threads.
00055   threads_.interrupt_all();
00056   threads_.join_all();
00057   delete face_go_mutex_;
00058   face_go_mutex_ = NULL;
00059 
00060   for (int i=list_.size(); i>0; i--) {
00061     list_.pop_back();
00062   }
00063 
00064   cam_model_ = 0;
00065 
00066 }
00067 
00068 
00069 /* Note: The multi-threading in this file is left over from a previous incarnation that allowed multiple 
00070  * cascades to be run at once. It hasn't been removed in case we want to return to that model, and since 
00071  * there isn't much overhead. Right now, however, only one classifier is being run per instantiation 
00072  * of the face_detector node.
00073  */
00074 
00075 
00076 void Faces::initFaceDetection(uint num_cascades, string haar_classifier_filename, double face_size_min_m, double face_size_max_m, double max_face_z_m, double face_sep_dist_m) {
00077   images_ready_ = 0;
00078 
00079   face_size_min_m_ = face_size_min_m;
00080   face_size_max_m_ = face_size_max_m;
00081   max_face_z_m_ = max_face_z_m;
00082   face_sep_dist_m_ = face_sep_dist_m;
00083 
00084   face_go_mutex_ = new boost::mutex();
00085   
00086   bool cascade_ok = cascade_.load(haar_classifier_filename);
00087 
00088   if (!cascade_ok) {
00089     ROS_ERROR_STREAM("Cascade file " << haar_classifier_filename << " doesn't exist.");
00090     return;
00091   }
00092   threads_.create_thread(boost::bind(&Faces::faceDetectionThread,this,0));
00093 }
00094 
00096 
00097 vector<Box2D3D> Faces::detectAllFaces(cv::Mat &image, double threshold, cv::Mat &disparity_image, image_geometry::StereoCameraModel *cam_model, pcl::PointCloud<pcl::PointXYZ>* depth_cloud) {
00098 
00099   faces_.clear();
00100 
00101   // Convert the image to grayscale, if necessary.
00102   
00103   if (image.channels() == 1) {
00104     cv_image_gray_.create(image.size(), CV_8UC1);
00105     image.copyTo(cv_image_gray_);
00106   }
00107   else if (image.channels() == 3) {
00108     cv_image_gray_.create(image.size(), CV_8UC1);
00109     cv::cvtColor(image, cv_image_gray_, CV_BGR2GRAY);
00110   }
00111   else {
00112     std::cerr << "Unknown image type"<<std::endl;
00113     return faces_;
00114   }
00115   
00116   depth_cloud_ = depth_cloud;
00117   disparity_image_ = &disparity_image;
00118   cam_model_ = cam_model;
00119   
00120   // Tell the face detection threads that they can run once.
00121   num_threads_to_wait_for_ = threads_.size();
00122   boost::mutex::scoped_lock fgmlock(*(face_go_mutex_));
00123   images_ready_++;
00124   fgmlock.unlock();
00125 
00126   face_detection_ready_cond_.notify_all();
00127 
00128   boost::mutex::scoped_lock fdmlock(face_done_mutex_);
00129   while (num_threads_to_wait_for_ > 0) {
00130     face_detection_done_cond_.wait(fdmlock);
00131   }  
00132 
00133   return faces_;
00134 }
00135 
00137 
00138 void Faces::faceDetectionThread(uint i) {
00139 
00140   while (1) {
00141     boost::mutex::scoped_lock fgmlock(*(face_go_mutex_));
00142     boost::mutex::scoped_lock tlock(t_mutex_, boost::defer_lock);
00143     while (1) {
00144       tlock.lock();
00145       if (images_ready_) {
00146         --images_ready_;
00147         tlock.unlock();
00148         break;
00149       }
00150       tlock.unlock();
00151       face_detection_ready_cond_.wait(fgmlock);
00152     }
00153 
00154     // Find the faces using OpenCV's haar cascade object detector.
00155     int this_min_face_size = 3; // can only be determined exactly for stereo camera pairs, see later below
00156 
00157     std::vector<cv::Rect> faces_vec;
00158     cascade_.detectMultiScale(cv_image_gray_, faces_vec,  1.2, 2, CV_HAAR_DO_CANNY_PRUNING, cv::Size(this_min_face_size,this_min_face_size));
00159 
00160     // Filter the faces using depth information, if available. Currently checks that the actual face size is within the given limits.
00161     cv::Scalar color(0,255,0);
00162     Box2D3D one_face;
00163     double avg_disp = 0.0;
00164     double avg_depth = 0.0;
00165     cv::Mat uvd(1,3,CV_32FC1);
00166     cv::Mat xyz(1,3,CV_32FC1);
00167     // For each face...
00168     for (uint iface = 0; iface < faces_vec.size(); iface++) {//face_seq->total; iface++) {
00169 
00170       one_face.status = "good";
00171 
00172       one_face.box2d = faces_vec[iface];
00173       one_face.id = i; // The cascade that computed this face.
00174 
00175       // use the metric depth image directly, if available
00176       if (depth_cloud_ != 0)
00177       {
00178         // Get the median disparity in the middle half of the bounding box.
00179         int uStart = floor(one_face.box2d.x+0.25*one_face.box2d.width);
00180         int uEnd = floor(one_face.box2d.x+0.75*one_face.box2d.width) + 1;
00181         int vStart = floor(one_face.box2d.y+0.25*one_face.box2d.height);
00182         int vEnd = floor(one_face.box2d.y+0.75*one_face.box2d.height) + 1;
00183         int du = abs(uEnd-uStart);
00184 
00185         cv::Mat tmat(1, du*abs(vEnd-vStart), CV_32FC1);
00186 
00187         int vector_position = 0;
00188         for (int v=vStart; v<vEnd; v++)
00189           for (int u=uStart; u<uEnd; u++, vector_position++)
00190           {
00191             float depthval = (*depth_cloud_)(u,v).z;
00192             if (!isnan(depthval)) tmat.at<float>(0,vector_position) = (*depth_cloud_)(u,v).z;
00193             else tmat.at<float>(0,vector_position) = 1e20;
00194           }
00195 
00196         cv::Mat tmat_sorted;
00197         cv::sort(tmat, tmat_sorted, CV_SORT_EVERY_COLUMN+CV_SORT_DESCENDING);
00198         avg_depth = tmat_sorted.at<float>(floor(cv::countNonZero(tmat_sorted>=0.0)*0.5)); // Get the middle valid disparity (-1 disparities are invalid)
00199 
00200         // Fill in the rest of the face data structure.
00201         one_face.center2d = cv::Point2d(one_face.box2d.x+one_face.box2d.width*0.5,
00202                                         one_face.box2d.y+one_face.box2d.height*0.5);
00203         one_face.radius2d = one_face.box2d.width*0.5;
00204 
00205         // If the median disparity was valid and the face is a reasonable size, the face status is "good".
00206         // If the median disparity was valid but the face isn't a reasonable size, the face status is "bad".
00207         // Otherwise, the face status is "unknown".
00208         std::cout << "avg_depth: " << avg_depth << "\n";
00209         if (avg_depth > 0) {
00210           double a,b, radiusX, radiusY;
00211           // vertical line regularly lies completely on the head whereas this does not hold very often for the horizontal line crossing the bounding box of the face
00212           // rectangle in the middle
00213           a = (*depth_cloud_)((int)(one_face.box2d.x+0.5*one_face.box2d.width), one_face.box2d.y).y;
00214           b = (*depth_cloud_)((int)(one_face.box2d.x+0.5*one_face.box2d.width), one_face.box2d.y+one_face.box2d.height).y;
00215           std::cout << a << "  " << b << "  y(a,b)\n";
00216           if (isnan(a) || isnan(b)) radiusY = 0.0;
00217           else radiusY = fabs(b-a)*0.5;
00218           one_face.radius3d = radiusY;
00219 
00220           // for radius estimation with the horizontal line through the face rectangle use points which typically still lie on the face and not in the background
00221           a = (*depth_cloud_)((int)(one_face.box2d.x+one_face.box2d.width*0.25), (int)(one_face.box2d.y+one_face.box2d.height*0.5)).x;
00222           b = (*depth_cloud_)((int)(one_face.box2d.x+one_face.box2d.width*0.75), (int)(one_face.box2d.y+one_face.box2d.height*0.5)).x;
00223           std::cout << a << "  " << b << "  x(a,b)\n";
00224           if (isnan(a) || isnan(b)) radiusX = 0.0;
00225           else {
00226             radiusX = fabs(b-a);
00227             if (radiusY != 0.0) one_face.radius3d = (radiusX+radiusY)*0.5;
00228             else one_face.radius3d = radiusX;
00229           }
00230 
00231           pcl::PointXYZ pxyz = (*depth_cloud_)(one_face.center2d.x, one_face.center2d.y);
00232           one_face.center3d = cv::Point3d(0.0,0.0,0.0);
00233           if (!isnan(pxyz.z)) one_face.center3d.z = pxyz.z;
00234 
00235           std::cout << "radiusX: " << radiusX << "  radiusY: " << radiusY << "\n";
00236           std::cout << "avg_depth: " << avg_depth << " > max_face_z_m_: " << max_face_z_m_ << " ?  2*radius3d: " << 2.0*one_face.radius3d << " < face_size_min_m_: " << face_size_min_m_ << " ?  2radius3d: " << 2.0*one_face.radius3d << " > face_size_max_m_:" << face_size_max_m_ << "?\n";
00237           if (one_face.radius3d == 0.0) one_face.status = "unknown";
00238           else if (avg_depth > max_face_z_m_ || 2.0*one_face.radius3d < face_size_min_m_ || 2.0*one_face.radius3d > face_size_max_m_) {
00239             one_face.status = "bad";
00240           }
00241         }
00242         else {
00243           one_face.radius3d = 0.0;
00244           one_face.center3d = cv::Point3d(0.0,0.0,0.0);
00245           one_face.status = "unknown";
00246         }
00247       }
00248       else
00249       {
00250         // access stereo camera data
00251         cv::Point3d p3_1(0,0,max_face_z_m_);
00252         cv::Point3d p3_2(face_size_min_m_,0,max_face_z_m_);
00253         cv::Point2d p2_1, p2_2;
00254         (cam_model_->left()).project3dToPixel(p3_1,p2_1);
00255         (cam_model_->left()).project3dToPixel(p3_2,p2_2);
00256         this_min_face_size = (int)(floor(fabs(p2_2.x-p2_1.x)));
00257         //std::cout << "this_min_face_size: " << this_min_face_size << "\n";
00258         //std::cout << (cam_model_->left()).fx() << " " << (cam_model_->left()).fy() << " " << (cam_model_->left()).Tx() << " " << (cam_model_->left()).Ty() << " " << (cam_model_->left()).cx() << " " << (cam_model_->left()).cy() << "\n";
00259 
00260         // Get the median disparity in the middle half of the bounding box.
00261         cv::Mat disp_roi_shallow(*disparity_image_,cv::Rect(floor(one_face.box2d.x+0.25*one_face.box2d.width),
00262                                                             floor(one_face.box2d.y+0.25*one_face.box2d.height),
00263                                                             floor(one_face.box2d.x+0.75*one_face.box2d.width) - floor(one_face.box2d.x+0.25*one_face.box2d.width) + 1,
00264                                                             floor(one_face.box2d.y+0.75*one_face.box2d.height) - floor(one_face.box2d.y+0.25*one_face.box2d.height) + 1));
00265         cv::Mat disp_roi = disp_roi_shallow.clone();
00266         cv::Mat tmat = disp_roi.reshape(1,disp_roi.rows*disp_roi.cols);
00267         cv::Mat tmat_sorted;
00268         cv::sort(tmat, tmat_sorted, CV_SORT_EVERY_COLUMN+CV_SORT_DESCENDING);
00269         avg_disp = tmat_sorted.at<float>(floor(cv::countNonZero(tmat_sorted>=0.0)/2.0)); // Get the middle valid disparity (-1 disparities are invalid)
00270 
00271         // Fill in the rest of the face data structure.
00272         one_face.center2d = cv::Point2d(one_face.box2d.x+one_face.box2d.width/2.0,
00273                                         one_face.box2d.y+one_face.box2d.height/2.0);
00274         one_face.radius2d = one_face.box2d.width/2.0;
00275 
00276         // If the median disparity was valid and the face is a reasonable size, the face status is "good".
00277         // If the median disparity was valid but the face isn't a reasonable size, the face status is "bad".
00278         // Otherwise, the face status is "unknown".
00279         //cv::Mat_<double> Q = cam_model_->reprojectionMatrix();
00280         //for (int j=0; j<4; j++) { for (int i=0; i<4; i++) std::cout << Q(i, j) << "\t"; std::cout << "\n"; }
00281         //std::cout << "avg_disp: " << avg_disp << "\n";
00282         if (avg_disp > 0) {
00283           cam_model_->projectDisparityTo3d(cv::Point2d(0.0,0.0),avg_disp,p3_1);
00284           cam_model_->projectDisparityTo3d(cv::Point2d(one_face.box2d.width,0.0),avg_disp,p3_2);
00285           //std::cout << "x:  " << p3_1.x << "  " << p3_2.x << "  y:  " << p3_1.y << "  " << p3_2.y << "  z:  " << p3_1.z << "  " << p3_2.z << "\n";
00286           one_face.radius3d = fabs(p3_2.x-p3_1.x)/2.0;
00287           cam_model_->projectDisparityTo3d(one_face.center2d, avg_disp, one_face.center3d);
00288           if (one_face.center3d.z > max_face_z_m_ || 2.0*one_face.radius3d < face_size_min_m_ || 2.0*one_face.radius3d > face_size_max_m_) {
00289             one_face.status = "bad";
00290           }
00291         }
00292         else {
00293           one_face.radius3d = 0.0;
00294           one_face.center3d = cv::Point3d(0.0,0.0,0.0);
00295           one_face.status = "unknown";
00296         }
00297       }
00298 
00299       // Add faces to the output vector.
00300       // lock faces
00301       boost::mutex::scoped_lock lock(face_mutex_);
00302       faces_.push_back(one_face);
00303       lock.unlock();
00304     }
00305 
00306     boost::mutex::scoped_lock fdmlock(face_done_mutex_);
00307     num_threads_to_wait_for_--;
00308     fdmlock.unlock();
00309     face_detection_done_cond_.notify_one();
00310   }
00311 }
00312 
00313 };