re_kinect_object_detector: recognitionmodel.cpp Source File

Go to the documentation of this file.
00001 
00031 #include "recognitionmodel.h"
00032 
00033 #include <fstream>
00034 #include <cstdio>
00035 
00036 #include <ros/ros.h>
00037 #include <sensor_msgs/Image.h>
00038 #include <cv_bridge/cv_bridge.h>
00039 #include <pcl/io/pcd_io.h>
00040 #include <boost/version.hpp>
00041 #include <boost/filesystem.hpp>
00042 #include <boost/filesystem/path.hpp>
00043 #include <boost/lexical_cast.hpp>
00044 #include <tinyxml.h>
00045 
00046 #include <opencv2/nonfree/features2d.hpp>
00047 #include <pcl/registration/registration.h>
00048 #include <pcl/registration/transformation_estimation_svd.h>
00049 
00050 namespace Utils3D {
00052     double distPoints(const cv::Point3f& p1, const cv::Point3f& p2)
00053     {
00054         cv::Point3f d = p1 - p2;
00055         return sqrt(d.x*d.x + d.y*d.y + d.z*d.z );
00056     }
00057 }
00058 
00059 ObjectAspect::ObjectAspect() {
00060     keypoints3D = PointCloud::Ptr(new PointCloud);
00061     points = PointCloud::Ptr(new PointCloud);
00062 }
00063 
00064 int ObjectAspect::calculate(const cv::Mat &image, const PointCloud::Ptr &pointcloud) {
00065 
00066     cv::Mat gray,timg;
00067 
00068     image.copyTo(this->image);
00069     this->points = pointcloud;
00070 
00071 #if 1
00072     cv::cvtColor(image,gray,CV_BGR2HLS);
00073     //gray = planes[2]-planes[1];
00074     std::vector<cv::Mat> planes;
00075     cv::split(gray,planes);
00076 
00077     gray = planes[1];
00078     cv::merge(&planes[0],(size_t)3,timg);
00079     cv::cvtColor(timg,timg,CV_HSV2BGR);
00080 #else
00081     timg = image;
00082 #endif
00083 
00084 #ifdef DEBUG_VIS
00085     cv::cvtColor(gray,timg,CV_GRAY2BGR);
00086 #endif
00087 
00088     const int HessianThreshold = 500;
00089     cv::SURF bug(HessianThreshold);
00090     bug.extended = false;
00091 
00092     bug(gray,255*cv::Mat::ones(gray.rows,gray.cols,CV_8U),keypoints,descriptors);
00093 
00094     for(size_t i = 0; i < keypoints.size(); i++) {
00095         cv::KeyPoint kp = keypoints[i];
00096         PointType pt3d = pointcloud->at(kp.pt.x,kp.pt.y);
00097         pt3d.rgb = kp.response;
00098         if(!isnan(pt3d.x)/*&&(kp.response/1000>3)*/) {
00099             keypoints3D->points.push_back(pt3d);
00100             map2D3D.insert(std::make_pair<int,int>(i,keypoints3D->points.size()-1));
00101             map2D3Dinv.insert(std::make_pair<int,int>(keypoints3D->points.size()-1,i));
00102         }
00103     }
00104 
00105 #ifdef DEBUG_VIS
00106     plotKeypoints(timg);
00107 
00108     cv::namedWindow("surf");
00109     cv::imshow("surf",timg);
00110 #endif
00111 
00112     return 0;
00113 }
00114 
00115 double ObjectAspect::matchAspects(ObjectAspect &other, Eigen::Matrix4f &transform) {
00116     std::vector<int> o_pcl2, t_pcl2;
00117     std::vector<cv::Point2i> correspondences;
00118     findCorrespondences(other, correspondences);
00119 
00120 #ifdef DEBUG_VIS
00121     cv::Mat3b image_stack(std::max(this->image.size().height,other.image.size().height), this->image.size().width+other.image.size().width );
00122 
00123     cv::Mat3b img1 = image_stack(cv::Rect(0,0,this->image.size().width,this->image.size().height));
00124     cv::Mat3b img2 = image_stack(cv::Rect(this->image.size().width,0,other.image.size().width,other.image.size().height));
00125 
00126     this->image.copyTo(img1);
00127     other.image.copyTo(img2);
00128 
00129     this->plotKeypoints(img1);
00130     other.plotKeypoints(img2);
00131 #endif
00132 
00133     for (size_t i = 0; i < correspondences.size(); i++) {
00134 
00135         int kpidx = correspondences[i].x;
00136         int lkpidx = correspondences[i].y;
00137 
00138         //cv::line(image, keypoints.at(kpidx ).pt,other.keypoints.at(lkpidx).pt,cv::Scalar(205,0,255),3);
00139 
00140         if ( map2D3D.find(kpidx) != map2D3D.end() && other.map2D3D.find(lkpidx) != other.map2D3D.end() ) {
00141             t_pcl2.push_back(kpidx);
00142             o_pcl2.push_back(lkpidx);
00143 #ifdef DEBUG_VIS
00144             cv::line(image_stack,this->keypoints[kpidx].pt,other.keypoints[lkpidx].pt+cv::Point2f(this->image.size().width,0),cv::Scalar(255,0,0));
00145 #endif
00146         }
00147     }
00148 
00149 //    ROS_INFO("found %d correspondences.",correspondences.size());
00150 //    ROS_INFO("found %d 3D correspondences.",t_pcl2.size());
00151 
00152     // distances that differ by <= EPSILON are considered equal
00153     const float EPSILON = 0.001;
00154     // distance threshold between 2 keypoints to be considered equal
00155     const float DISTANCE_THRESHOLD = 0.02;
00156 
00157     PointCloud o_pcl, t_pcl;
00158     if (t_pcl2.size()>=3) {
00159         for (size_t i = 0; i < t_pcl2.size()-2; i ++ ){
00160             PointType npt1 = this->keypoints3D->points[this->map2D3D[t_pcl2.at(i)]];
00161             PointType lpt1 = other.keypoints3D->points[other.map2D3D[o_pcl2.at(i)]];
00162             cv::Point3f np1 = cv::Point3f(npt1.x,npt1.y,npt1.z);
00163             cv::Point3f lp1 = cv::Point3f(lpt1.x,lpt1.y,lpt1.z);
00164             for (size_t j = i+1; j < t_pcl2.size()-1; j++) {
00165                 PointType npt2 = this->keypoints3D->points[this->map2D3D[t_pcl2.at(j)]];
00166                 PointType lpt2 = other.keypoints3D->points[other.map2D3D[o_pcl2.at(j)]];
00167                 cv::Point3f np2 = cv::Point3f(npt2.x,npt2.y,npt2.z);
00168                 cv::Point3f lp2 = cv::Point3f(lpt2.x,lpt2.y,lpt2.z);
00169                 if ( Utils3D::distPoints(np1,np2) < DISTANCE_THRESHOLD || fabs(Utils3D::distPoints(np1,np2) - Utils3D::distPoints(lp1,lp2)) > EPSILON )
00170                     continue;
00171                 for (size_t k = j+1; k < t_pcl2.size(); k++) {
00172                     PointType npt3 = this->keypoints3D->points[this->map2D3D[t_pcl2.at(k)]];
00173                     PointType lpt3 = other.keypoints3D->points[other.map2D3D[o_pcl2.at(k)]];
00174                     cv::Point3f np3 = cv::Point3f(npt3.x,npt3.y,npt3.z);
00175                     cv::Point3f lp3 = cv::Point3f(lpt3.x,lpt3.y,lpt3.z);
00176                     if ( Utils3D::distPoints(np1,np3) < DISTANCE_THRESHOLD ||  fabs(Utils3D::distPoints(np1,np3) - Utils3D::distPoints(lp1,lp3)) > EPSILON )
00177                         continue;
00178 
00179                     if (Utils3D::distPoints(np3,np2) < DISTANCE_THRESHOLD ||  fabs(Utils3D::distPoints(np3,np2) - Utils3D::distPoints(lp3,lp2)) > EPSILON )
00180                         continue;
00181 
00182                     cv::Point3f v1 = np3-np2;
00183                     cv::Point3f v2 = np1-np2;
00184                     cv::Point3f v3 = np3-np1;
00185 
00186                     double angle = acos(v1.ddot(v2)/sqrt(v1.ddot(v1))/sqrt(v2.ddot(v2)))/CV_PI*180.;
00187                     v1 = -1*v1;
00188                     double angle2 = acos(v1.ddot(v3)/sqrt(v1.ddot(v1))/sqrt(v3.ddot(v3)))/CV_PI*180.;
00189 
00190 //                    cerr << "angles: " << angle << ", " << angle2 << endl;
00191 
00192                     // check whether the 3 vectors are linearly independent
00193                     const double ANGLE_THRESH = 30;
00194                     if ((fabs(angle)<ANGLE_THRESH) || (fabs(angle2)<ANGLE_THRESH) ||(fabs(angle)>180-ANGLE_THRESH)||(fabs(angle2)>180-ANGLE_THRESH))
00195                         continue;
00196 
00197 //                    std::cerr << "distances: " << Utils3D::distPoints(np1,np3)
00198 //                         << "\t" << Utils3D::distPoints(np3,np2)
00199 //                         << "\t" << Utils3D::distPoints(np1,np2) << " i,j,k: " << i << ","<< j << "," << k <<  std::endl;
00200 #ifdef DEBUG_VIS
00201                     cv::circle(image_stack, this->keypoints[t_pcl2.at(i)].pt, 3, cv::Scalar(255,255,0),-1);
00202                     cv::circle(image_stack, this->keypoints[t_pcl2.at(j)].pt, 3, cv::Scalar(255,255,0),-1);
00203                     cv::circle(image_stack, this->keypoints[t_pcl2.at(k)].pt, 3, cv::Scalar(255,255,0),-1);
00204 
00205                     cv::circle(image_stack, other.keypoints[o_pcl2.at(i)].pt+cv::Point2f(this->image.size().width,0), 3, cv::Scalar(255,255,0),-1);
00206                     cv::circle(image_stack, other.keypoints[o_pcl2.at(j)].pt+cv::Point2f(this->image.size().width,0), 3, cv::Scalar(255,255,0),-1);
00207                     cv::circle(image_stack, other.keypoints[o_pcl2.at(k)].pt+cv::Point2f(this->image.size().width,0), 3, cv::Scalar(255,255,0),-1);
00208 
00209                     cv::line(image_stack,this->keypoints[t_pcl2.at(i)].pt,other.keypoints[o_pcl2.at(i)].pt+cv::Point2f(this->image.size().width,0),cv::Scalar(0,255,255),2);
00210                     cv::line(image_stack,this->keypoints[t_pcl2.at(j)].pt,other.keypoints[o_pcl2.at(j)].pt+cv::Point2f(this->image.size().width,0),cv::Scalar(0,255,255),2);
00211                     cv::line(image_stack,this->keypoints[t_pcl2.at(k)].pt,other.keypoints[o_pcl2.at(k)].pt+cv::Point2f(this->image.size().width,0),cv::Scalar(0,255,255),2);
00212 #endif
00213                     o_pcl.push_back(lpt1);
00214                     o_pcl.push_back(lpt2);
00215                     o_pcl.push_back(lpt3);
00216 
00217                     t_pcl.push_back(npt1);
00218                     t_pcl.push_back(npt2);
00219                     t_pcl.push_back(npt3);
00220                     break;
00221                 }
00222                 if (t_pcl.size() != 0)
00223                     break;
00224             }
00225             if (t_pcl.size() != 0)
00226                 break;
00227         }
00228     }
00229 
00230 
00231     if(t_pcl.points.size()==3) {
00232         // get transformation
00233         Eigen::Matrix4f t;
00234         pcl::registration::TransformationEstimationSVD<PointType, PointType> te_svd;
00235         te_svd.estimateRigidTransformation(t_pcl, o_pcl, t);
00236 
00237 #ifdef DEBUG_VIS
00238     cv::namedWindow("correspondences");
00239     cv::imshow("correspondences",image_stack);
00240 #endif
00241         transform = t;
00242         return 1;
00243 
00244     }
00245     return DBL_MAX;
00246 }
00247 
00248 int ObjectAspect::naiveNearestNeighbor(const float* vec,
00249                                        const std::vector<cv::KeyPoint>& model_keypoints,
00250                                        const float* model_descriptors, const std::map<int,int>& imageMap2D3D) {
00251     // length of descriptor
00252     const int LENGTH = 64;
00253     int neighbor = -1;
00254     double d = 0.;
00255     double dist1 = DBL_MAX, dist2 = DBL_MAX;
00256 
00257     for(size_t i = 0; i < model_keypoints.size(); i++ ) {
00258         if(imageMap2D3D.find(i)==imageMap2D3D.end())
00259             continue;
00260 
00261         const float* mvec = &model_descriptors[i*64];
00262         d = compareSURFDescriptors( vec, mvec, dist2, LENGTH );
00263         if( d < dist1 ) {
00264             dist2 = dist1;
00265             dist1 = d;
00266             neighbor = i;
00267         }
00268         else if ( d < dist2 )
00269             dist2 = d;
00270     }
00271     if ( dist1 < 0.6*dist2 )
00272         return neighbor;
00273     return -1;
00274 }
00275 
00276 double
00277 ObjectAspect::compareSURFDescriptors(const float* d1, const float* d2, double best, int length) {
00278     double total_cost = 0;
00279     assert( length % 4 == 0 );
00280     for( int i = 0; i < length; i += 4 ) {
00281         double t0 = d1[i  ] - d2[i  ];
00282         double t1 = d1[i+1] - d2[i+1];
00283         double t2 = d1[i+2] - d2[i+2];
00284         double t3 = d1[i+3] - d2[i+3];
00285         total_cost += t0*t0 + t1*t1 + t2*t2 + t3*t3;
00286         if( total_cost > best )
00287             break;
00288     }
00289 
00290     return total_cost;
00291 }
00292 
00293 void ObjectAspect::findCorrespondences(const ObjectAspect& other, std::vector<cv::Point2i>& ptpairs) {
00294     const std::vector<cv::KeyPoint>& imageKeypoints = other.keypoints;
00295     const std::vector<float>& imageDescriptors = other.descriptors;
00296     const std::map<int,int>& imageMap2D3D = other.map2D3D;
00297     ptpairs.clear();
00298 
00299     for(size_t i = 0; i < keypoints.size(); i++) {
00300         if(map2D3D.find(i)==map2D3D.end())
00301             continue;
00302 
00303         const float* mvec = &descriptors[i*64];
00304 
00305         int nearest_neighbor = naiveNearestNeighbor(mvec,
00306                                                     imageKeypoints,
00307                                                     &imageDescriptors[0] ,
00308                                                     imageMap2D3D);
00309         if (nearest_neighbor >= 0) {
00310             ptpairs.push_back(cv::Point2i(i,nearest_neighbor));
00311         }
00312     }
00313 }
00314 
00315 
00316 void ObjectAspect::plotKeypoints(cv::Mat &image) {
00317     const int radius = 1, thickness = 2;
00318     const cv::Scalar color(0,0,0,0);
00319     for(size_t i = 0;i<keypoints.size();++i) {
00320         cv::KeyPoint kp = keypoints[i];
00321         cv::circle(image,kp.pt,kp.response/1000,cv::Scalar(255,0,0,0));
00322         cv::circle(image,kp.pt,radius,color,thickness);
00323         if (map2D3D.find(i) != map2D3D.end())
00324             cv::circle(image,kp.pt,kp.response/1000,cv::Scalar(0,255,0,0));
00325     }
00326 }
00327 
00336 int read_binary_PCL(const std::string& filename, PointCloud::Ptr cloud) {
00337     std::ifstream in_file(filename.c_str());
00338     std::string token("DATA binary\n");
00339 
00340     const size_t BUFFER_SIZE = 4096;
00341     char buffer[BUFFER_SIZE];
00342     in_file.read(buffer, BUFFER_SIZE);
00343     if (in_file.gcount() < BUFFER_SIZE) {
00344         std::cerr << "could not read whole file" << std::endl;
00345         return -1;
00346     }
00347 
00348     bool null_after_header = false;
00349     size_t matched_chars = 0;
00350     size_t header_length = 0;
00351     for(size_t i=0; i<BUFFER_SIZE; i++) {
00352         if (buffer[i] == token[matched_chars]) {
00353             matched_chars++;
00354         } else
00355             matched_chars = 0;
00356 
00357         if (matched_chars == token.length()) {
00358             header_length = i;
00359             size_t num_nulls = 0;
00360             for(size_t j=header_length+1; j<BUFFER_SIZE; j++) {
00361                 if (buffer[j] == '\0')
00362                     num_nulls++;
00363                 else
00364                     break;
00365             }
00366             null_after_header = num_nulls > 10;
00367             if (null_after_header) {
00368                 std::cout << "null bytes found after header, converting from old PCD binary format" << std::endl;
00369             }
00370             break;
00371         }
00372     }
00373 
00374     std::string tmp_filename;
00375     int temp_file_fd;
00376     if (null_after_header) {
00377         char tmp_name[] = "/tmp/pcd_file_XXXXXX";
00378 
00379         temp_file_fd = mkstemp(tmp_name);
00380         if (temp_file_fd < 0) {
00381             std::cerr << "could not create temporary file" << std::endl;
00382             return -1;
00383         }
00384         tmp_filename = tmp_name;
00385 
00386         std::ofstream tmp_file(tmp_filename.c_str());
00387         tmp_file.write(buffer, header_length+1);
00388 
00389         in_file.seekg(4096, std::ios::beg);
00390         while(in_file.good()) {
00391             in_file.read(buffer, BUFFER_SIZE);
00392             tmp_file.write(buffer, in_file.gcount());
00393         }
00394         in_file.close();
00395         tmp_file.close();
00396         pcl::io::loadPCDFile(tmp_filename, *cloud);
00397         remove(tmp_filename.c_str());
00398     } else {
00399         in_file.close();
00400         // just load it
00401         pcl::io::loadPCDFile(filename, *cloud);
00402     }
00403 
00404     return 0;
00405 }
00406 
00407 void ObjectAspect::fromFile(const std::string &filename) {
00408     PointCloud::Ptr cloud  = PointCloud::Ptr(new PointCloud);
00409     read_binary_PCL(filename, cloud);
00410     sensor_msgs::ImagePtr image_(new sensor_msgs::Image);
00411     pcl::toROSMsg (*cloud, *image_);
00412     cv_bridge::CvImagePtr img_ = cv_bridge::toCvCopy(*image_, "bgr8");
00413 
00414     ROS_INFO("loading model aspect: %s",filename.c_str());
00415 
00416     calculate(img_->image, cloud);
00417     points = cloud;
00418     // TODO: Load the view point to object base transform from the pointclouds viewpoint;
00419 }
00420 
00421 RecognitionModel::RecognitionModel()
00422 {
00423 }
00424 
00426 bool loadMetaFile(const std::string& file_name, std::string& model_name, std::string& model_type, int& face_count) {
00427     TiXmlDocument doc(file_name);
00428     bool loaded = doc.LoadFile();
00429     if (!loaded)
00430         return false;
00431 
00432     TiXmlHandle hdoc(&doc);
00433     TiXmlHandle model_handle = hdoc.FirstChildElement("model");
00434     TiXmlElement *model_element = model_handle.Element();
00435     if (model_element == NULL)
00436         return false;
00437     TiXmlElement *name_element = model_handle.FirstChildElement("name").Element();
00438     if (name_element == NULL)
00439         return false;
00440     model_name = name_element->GetText();
00441     TiXmlElement *type_element = model_handle.FirstChildElement("type").Element();
00442     if (type_element == NULL)
00443         return false;
00444     model_type = type_element->GetText();
00445     TiXmlElement *faces_element = model_handle.FirstChildElement("faces").Element();
00446     if (faces_element == NULL)
00447         return false;
00448     std::string faces_str = faces_element->GetText();
00449     face_count = boost::lexical_cast<int>(faces_str);
00450 
00451     return true;
00452 }
00453 
00454 
00456 const char *boost_path_to_cstr(const boost::filesystem::path& path) {
00457 #if BOOST_VERSION >= 104600
00458     return path.c_str();
00459 #else
00460     return path.native_file_string().c_str();
00461 #endif
00462 }
00463 
00464 bool RecognitionModel::loadFromPath(const std::string &path) {
00465     boost::filesystem::path dir(path);
00466 
00467     if(!boost::filesystem::is_directory(dir)) {
00468          ROS_ERROR("Not a directory: %s", boost_path_to_cstr(dir));
00469          return false;
00470     }
00471 
00472     boost::filesystem::path meta_path(dir / "meta.xml");
00473     if (!boost::filesystem::exists(meta_path)) {
00474         ROS_ERROR("No meta.xml file found in %s", boost_path_to_cstr(dir));
00475         return false;
00476     }
00477 
00478     int face_count = 0;
00479     std::string model_type = "";
00480     bool loaded = loadMetaFile(boost_path_to_cstr(meta_path), model_name, model_type, face_count);
00481 
00482     if (!loaded || model_name.empty() || (face_count == 0) || (model_type != "kinect_pcl")) {
00483         ROS_ERROR("invalid meta.xml in %s", boost_path_to_cstr(dir));
00484         return false;
00485     }
00486 
00487     for(boost::filesystem::directory_iterator it(dir); it!=boost::filesystem::directory_iterator(); ++it)
00488     {
00489         boost::filesystem::path pref = *it;
00490         const static std::string FACE_PREFIX = "dense_face_";
00491 
00492 #if BOOST_VERSION >= 104600
00493         if ((pref.filename().string().substr(0, FACE_PREFIX.length()) == FACE_PREFIX) && (pref.extension() == ".pcd")) {
00494 #else
00495         if ((pref.filename().substr(0, FACE_PREFIX.length()) == FACE_PREFIX) && (pref.extension() == ".pcd")) {
00496 #endif
00497             aspects.push_back(new ObjectAspect());
00498             ObjectAspect* aspect = aspects.back();
00499             aspect->fromFile(pref.string());
00500             if (aspects.size() > static_cast<size_t>(face_count)) {
00501                 ROS_INFO("found more faces than specified in meta.xml. please check %s", boost_path_to_cstr(meta_path));
00502                 break;
00503             }
00504         }
00505     }
00506 
00507     if (aspects.size() < static_cast<size_t>(face_count)) {
00508         ROS_ERROR("meta.xml specified %i faces, but only %i were found.", face_count, aspects.size());
00509         return false;
00510     }
00511     if (aspects.empty()) {
00512         ROS_ERROR("no model files in path %s", path.c_str());
00513         return false;
00514     }
00515     return true;
00516 }
00517 
00518 
00519 bool RecognitionModel::matchAspects(ObjectAspect &other, Eigen::Matrix4f &model2scene) {
00520     double minerror = DBL_MAX;
00521 
00522     for (size_t i = 0; i < aspects.size(); i ++) {
00523         ObjectAspect* model_aspect = aspects[i];
00524         Eigen::Matrix4f trafo;
00525         double error = model_aspect->matchAspects(other,trafo);
00526         if (error < minerror) {
00527             model2scene = trafo;
00528             other.match = model_aspect;
00529             minerror = error;
00530             ROS_INFO("HIT %d, minerror: %f",(int)i, minerror);
00531         }
00532     }
00533 
00534     return minerror != DBL_MAX;
00535 }