00001
00031 #include "recognitionmodel.h"
00032
00033 #include <fstream>
00034 #include <cstdio>
00035
00036 #include <ros/ros.h>
00037 #include <sensor_msgs/Image.h>
00038 #include <cv_bridge/cv_bridge.h>
00039 #include <pcl/io/pcd_io.h>
00040 #include <boost/version.hpp>
00041 #include <boost/filesystem.hpp>
00042 #include <boost/filesystem/path.hpp>
00043 #include <boost/lexical_cast.hpp>
00044 #include <tinyxml.h>
00045
00046 #include <opencv2/nonfree/features2d.hpp>
00047 #include <pcl/registration/registration.h>
00048 #include <pcl/registration/transformation_estimation_svd.h>
00049
00050 namespace Utils3D {
00052 double distPoints(const cv::Point3f& p1, const cv::Point3f& p2)
00053 {
00054 cv::Point3f d = p1 - p2;
00055 return sqrt(d.x*d.x + d.y*d.y + d.z*d.z );
00056 }
00057 }
00058
00059 ObjectAspect::ObjectAspect() {
00060 keypoints3D = PointCloud::Ptr(new PointCloud);
00061 points = PointCloud::Ptr(new PointCloud);
00062 }
00063
00064 int ObjectAspect::calculate(const cv::Mat &image, const PointCloud::Ptr &pointcloud) {
00065
00066 cv::Mat gray,timg;
00067
00068 image.copyTo(this->image);
00069 this->points = pointcloud;
00070
00071 #if 1
00072 cv::cvtColor(image,gray,CV_BGR2HLS);
00073
00074 std::vector<cv::Mat> planes;
00075 cv::split(gray,planes);
00076
00077 gray = planes[1];
00078 cv::merge(&planes[0],(size_t)3,timg);
00079 cv::cvtColor(timg,timg,CV_HSV2BGR);
00080 #else
00081 timg = image;
00082 #endif
00083
00084 #ifdef DEBUG_VIS
00085 cv::cvtColor(gray,timg,CV_GRAY2BGR);
00086 #endif
00087
00088 const int HessianThreshold = 500;
00089 cv::SURF bug(HessianThreshold);
00090 bug.extended = false;
00091
00092 bug(gray,255*cv::Mat::ones(gray.rows,gray.cols,CV_8U),keypoints,descriptors);
00093
00094 for(size_t i = 0; i < keypoints.size(); i++) {
00095 cv::KeyPoint kp = keypoints[i];
00096 PointType pt3d = pointcloud->at(kp.pt.x,kp.pt.y);
00097 pt3d.rgb = kp.response;
00098 if(!isnan(pt3d.x)) {
00099 keypoints3D->points.push_back(pt3d);
00100 map2D3D.insert(std::make_pair<int,int>(i,keypoints3D->points.size()-1));
00101 map2D3Dinv.insert(std::make_pair<int,int>(keypoints3D->points.size()-1,i));
00102 }
00103 }
00104
00105 #ifdef DEBUG_VIS
00106 plotKeypoints(timg);
00107
00108 cv::namedWindow("surf");
00109 cv::imshow("surf",timg);
00110 #endif
00111
00112 return 0;
00113 }
00114
00115 double ObjectAspect::matchAspects(ObjectAspect &other, Eigen::Matrix4f &transform) {
00116 std::vector<int> o_pcl2, t_pcl2;
00117 std::vector<cv::Point2i> correspondences;
00118 findCorrespondences(other, correspondences);
00119
00120 #ifdef DEBUG_VIS
00121 cv::Mat3b image_stack(std::max(this->image.size().height,other.image.size().height), this->image.size().width+other.image.size().width );
00122
00123 cv::Mat3b img1 = image_stack(cv::Rect(0,0,this->image.size().width,this->image.size().height));
00124 cv::Mat3b img2 = image_stack(cv::Rect(this->image.size().width,0,other.image.size().width,other.image.size().height));
00125
00126 this->image.copyTo(img1);
00127 other.image.copyTo(img2);
00128
00129 this->plotKeypoints(img1);
00130 other.plotKeypoints(img2);
00131 #endif
00132
00133 for (size_t i = 0; i < correspondences.size(); i++) {
00134
00135 int kpidx = correspondences[i].x;
00136 int lkpidx = correspondences[i].y;
00137
00138
00139
00140 if ( map2D3D.find(kpidx) != map2D3D.end() && other.map2D3D.find(lkpidx) != other.map2D3D.end() ) {
00141 t_pcl2.push_back(kpidx);
00142 o_pcl2.push_back(lkpidx);
00143 #ifdef DEBUG_VIS
00144 cv::line(image_stack,this->keypoints[kpidx].pt,other.keypoints[lkpidx].pt+cv::Point2f(this->image.size().width,0),cv::Scalar(255,0,0));
00145 #endif
00146 }
00147 }
00148
00149
00150
00151
00152
00153 const float EPSILON = 0.001;
00154
00155 const float DISTANCE_THRESHOLD = 0.02;
00156
00157 PointCloud o_pcl, t_pcl;
00158 if (t_pcl2.size()>=3) {
00159 for (size_t i = 0; i < t_pcl2.size()-2; i ++ ){
00160 PointType npt1 = this->keypoints3D->points[this->map2D3D[t_pcl2.at(i)]];
00161 PointType lpt1 = other.keypoints3D->points[other.map2D3D[o_pcl2.at(i)]];
00162 cv::Point3f np1 = cv::Point3f(npt1.x,npt1.y,npt1.z);
00163 cv::Point3f lp1 = cv::Point3f(lpt1.x,lpt1.y,lpt1.z);
00164 for (size_t j = i+1; j < t_pcl2.size()-1; j++) {
00165 PointType npt2 = this->keypoints3D->points[this->map2D3D[t_pcl2.at(j)]];
00166 PointType lpt2 = other.keypoints3D->points[other.map2D3D[o_pcl2.at(j)]];
00167 cv::Point3f np2 = cv::Point3f(npt2.x,npt2.y,npt2.z);
00168 cv::Point3f lp2 = cv::Point3f(lpt2.x,lpt2.y,lpt2.z);
00169 if ( Utils3D::distPoints(np1,np2) < DISTANCE_THRESHOLD || fabs(Utils3D::distPoints(np1,np2) - Utils3D::distPoints(lp1,lp2)) > EPSILON )
00170 continue;
00171 for (size_t k = j+1; k < t_pcl2.size(); k++) {
00172 PointType npt3 = this->keypoints3D->points[this->map2D3D[t_pcl2.at(k)]];
00173 PointType lpt3 = other.keypoints3D->points[other.map2D3D[o_pcl2.at(k)]];
00174 cv::Point3f np3 = cv::Point3f(npt3.x,npt3.y,npt3.z);
00175 cv::Point3f lp3 = cv::Point3f(lpt3.x,lpt3.y,lpt3.z);
00176 if ( Utils3D::distPoints(np1,np3) < DISTANCE_THRESHOLD || fabs(Utils3D::distPoints(np1,np3) - Utils3D::distPoints(lp1,lp3)) > EPSILON )
00177 continue;
00178
00179 if (Utils3D::distPoints(np3,np2) < DISTANCE_THRESHOLD || fabs(Utils3D::distPoints(np3,np2) - Utils3D::distPoints(lp3,lp2)) > EPSILON )
00180 continue;
00181
00182 cv::Point3f v1 = np3-np2;
00183 cv::Point3f v2 = np1-np2;
00184 cv::Point3f v3 = np3-np1;
00185
00186 double angle = acos(v1.ddot(v2)/sqrt(v1.ddot(v1))/sqrt(v2.ddot(v2)))/CV_PI*180.;
00187 v1 = -1*v1;
00188 double angle2 = acos(v1.ddot(v3)/sqrt(v1.ddot(v1))/sqrt(v3.ddot(v3)))/CV_PI*180.;
00189
00190
00191
00192
00193 const double ANGLE_THRESH = 30;
00194 if ((fabs(angle)<ANGLE_THRESH) || (fabs(angle2)<ANGLE_THRESH) ||(fabs(angle)>180-ANGLE_THRESH)||(fabs(angle2)>180-ANGLE_THRESH))
00195 continue;
00196
00197
00198
00199
00200 #ifdef DEBUG_VIS
00201 cv::circle(image_stack, this->keypoints[t_pcl2.at(i)].pt, 3, cv::Scalar(255,255,0),-1);
00202 cv::circle(image_stack, this->keypoints[t_pcl2.at(j)].pt, 3, cv::Scalar(255,255,0),-1);
00203 cv::circle(image_stack, this->keypoints[t_pcl2.at(k)].pt, 3, cv::Scalar(255,255,0),-1);
00204
00205 cv::circle(image_stack, other.keypoints[o_pcl2.at(i)].pt+cv::Point2f(this->image.size().width,0), 3, cv::Scalar(255,255,0),-1);
00206 cv::circle(image_stack, other.keypoints[o_pcl2.at(j)].pt+cv::Point2f(this->image.size().width,0), 3, cv::Scalar(255,255,0),-1);
00207 cv::circle(image_stack, other.keypoints[o_pcl2.at(k)].pt+cv::Point2f(this->image.size().width,0), 3, cv::Scalar(255,255,0),-1);
00208
00209 cv::line(image_stack,this->keypoints[t_pcl2.at(i)].pt,other.keypoints[o_pcl2.at(i)].pt+cv::Point2f(this->image.size().width,0),cv::Scalar(0,255,255),2);
00210 cv::line(image_stack,this->keypoints[t_pcl2.at(j)].pt,other.keypoints[o_pcl2.at(j)].pt+cv::Point2f(this->image.size().width,0),cv::Scalar(0,255,255),2);
00211 cv::line(image_stack,this->keypoints[t_pcl2.at(k)].pt,other.keypoints[o_pcl2.at(k)].pt+cv::Point2f(this->image.size().width,0),cv::Scalar(0,255,255),2);
00212 #endif
00213 o_pcl.push_back(lpt1);
00214 o_pcl.push_back(lpt2);
00215 o_pcl.push_back(lpt3);
00216
00217 t_pcl.push_back(npt1);
00218 t_pcl.push_back(npt2);
00219 t_pcl.push_back(npt3);
00220 break;
00221 }
00222 if (t_pcl.size() != 0)
00223 break;
00224 }
00225 if (t_pcl.size() != 0)
00226 break;
00227 }
00228 }
00229
00230
00231 if(t_pcl.points.size()==3) {
00232
00233 Eigen::Matrix4f t;
00234 pcl::registration::TransformationEstimationSVD<PointType, PointType> te_svd;
00235 te_svd.estimateRigidTransformation(t_pcl, o_pcl, t);
00236
00237 #ifdef DEBUG_VIS
00238 cv::namedWindow("correspondences");
00239 cv::imshow("correspondences",image_stack);
00240 #endif
00241 transform = t;
00242 return 1;
00243
00244 }
00245 return DBL_MAX;
00246 }
00247
00248 int ObjectAspect::naiveNearestNeighbor(const float* vec,
00249 const std::vector<cv::KeyPoint>& model_keypoints,
00250 const float* model_descriptors, const std::map<int,int>& imageMap2D3D) {
00251
00252 const int LENGTH = 64;
00253 int neighbor = -1;
00254 double d = 0.;
00255 double dist1 = DBL_MAX, dist2 = DBL_MAX;
00256
00257 for(size_t i = 0; i < model_keypoints.size(); i++ ) {
00258 if(imageMap2D3D.find(i)==imageMap2D3D.end())
00259 continue;
00260
00261 const float* mvec = &model_descriptors[i*64];
00262 d = compareSURFDescriptors( vec, mvec, dist2, LENGTH );
00263 if( d < dist1 ) {
00264 dist2 = dist1;
00265 dist1 = d;
00266 neighbor = i;
00267 }
00268 else if ( d < dist2 )
00269 dist2 = d;
00270 }
00271 if ( dist1 < 0.6*dist2 )
00272 return neighbor;
00273 return -1;
00274 }
00275
00276 double
00277 ObjectAspect::compareSURFDescriptors(const float* d1, const float* d2, double best, int length) {
00278 double total_cost = 0;
00279 assert( length % 4 == 0 );
00280 for( int i = 0; i < length; i += 4 ) {
00281 double t0 = d1[i ] - d2[i ];
00282 double t1 = d1[i+1] - d2[i+1];
00283 double t2 = d1[i+2] - d2[i+2];
00284 double t3 = d1[i+3] - d2[i+3];
00285 total_cost += t0*t0 + t1*t1 + t2*t2 + t3*t3;
00286 if( total_cost > best )
00287 break;
00288 }
00289
00290 return total_cost;
00291 }
00292
00293 void ObjectAspect::findCorrespondences(const ObjectAspect& other, std::vector<cv::Point2i>& ptpairs) {
00294 const std::vector<cv::KeyPoint>& imageKeypoints = other.keypoints;
00295 const std::vector<float>& imageDescriptors = other.descriptors;
00296 const std::map<int,int>& imageMap2D3D = other.map2D3D;
00297 ptpairs.clear();
00298
00299 for(size_t i = 0; i < keypoints.size(); i++) {
00300 if(map2D3D.find(i)==map2D3D.end())
00301 continue;
00302
00303 const float* mvec = &descriptors[i*64];
00304
00305 int nearest_neighbor = naiveNearestNeighbor(mvec,
00306 imageKeypoints,
00307 &imageDescriptors[0] ,
00308 imageMap2D3D);
00309 if (nearest_neighbor >= 0) {
00310 ptpairs.push_back(cv::Point2i(i,nearest_neighbor));
00311 }
00312 }
00313 }
00314
00315
00316 void ObjectAspect::plotKeypoints(cv::Mat &image) {
00317 const int radius = 1, thickness = 2;
00318 const cv::Scalar color(0,0,0,0);
00319 for(size_t i = 0;i<keypoints.size();++i) {
00320 cv::KeyPoint kp = keypoints[i];
00321 cv::circle(image,kp.pt,kp.response/1000,cv::Scalar(255,0,0,0));
00322 cv::circle(image,kp.pt,radius,color,thickness);
00323 if (map2D3D.find(i) != map2D3D.end())
00324 cv::circle(image,kp.pt,kp.response/1000,cv::Scalar(0,255,0,0));
00325 }
00326 }
00327
00336 int read_binary_PCL(const std::string& filename, PointCloud::Ptr cloud) {
00337 std::ifstream in_file(filename.c_str());
00338 std::string token("DATA binary\n");
00339
00340 const size_t BUFFER_SIZE = 4096;
00341 char buffer[BUFFER_SIZE];
00342 in_file.read(buffer, BUFFER_SIZE);
00343 if (in_file.gcount() < BUFFER_SIZE) {
00344 std::cerr << "could not read whole file" << std::endl;
00345 return -1;
00346 }
00347
00348 bool null_after_header = false;
00349 size_t matched_chars = 0;
00350 size_t header_length = 0;
00351 for(size_t i=0; i<BUFFER_SIZE; i++) {
00352 if (buffer[i] == token[matched_chars]) {
00353 matched_chars++;
00354 } else
00355 matched_chars = 0;
00356
00357 if (matched_chars == token.length()) {
00358 header_length = i;
00359 size_t num_nulls = 0;
00360 for(size_t j=header_length+1; j<BUFFER_SIZE; j++) {
00361 if (buffer[j] == '\0')
00362 num_nulls++;
00363 else
00364 break;
00365 }
00366 null_after_header = num_nulls > 10;
00367 if (null_after_header) {
00368 std::cout << "null bytes found after header, converting from old PCD binary format" << std::endl;
00369 }
00370 break;
00371 }
00372 }
00373
00374 std::string tmp_filename;
00375 int temp_file_fd;
00376 if (null_after_header) {
00377 char tmp_name[] = "/tmp/pcd_file_XXXXXX";
00378
00379 temp_file_fd = mkstemp(tmp_name);
00380 if (temp_file_fd < 0) {
00381 std::cerr << "could not create temporary file" << std::endl;
00382 return -1;
00383 }
00384 tmp_filename = tmp_name;
00385
00386 std::ofstream tmp_file(tmp_filename.c_str());
00387 tmp_file.write(buffer, header_length+1);
00388
00389 in_file.seekg(4096, std::ios::beg);
00390 while(in_file.good()) {
00391 in_file.read(buffer, BUFFER_SIZE);
00392 tmp_file.write(buffer, in_file.gcount());
00393 }
00394 in_file.close();
00395 tmp_file.close();
00396 pcl::io::loadPCDFile(tmp_filename, *cloud);
00397 remove(tmp_filename.c_str());
00398 } else {
00399 in_file.close();
00400
00401 pcl::io::loadPCDFile(filename, *cloud);
00402 }
00403
00404 return 0;
00405 }
00406
00407 void ObjectAspect::fromFile(const std::string &filename) {
00408 PointCloud::Ptr cloud = PointCloud::Ptr(new PointCloud);
00409 read_binary_PCL(filename, cloud);
00410 sensor_msgs::ImagePtr image_(new sensor_msgs::Image);
00411 pcl::toROSMsg (*cloud, *image_);
00412 cv_bridge::CvImagePtr img_ = cv_bridge::toCvCopy(*image_, "bgr8");
00413
00414 ROS_INFO("loading model aspect: %s",filename.c_str());
00415
00416 calculate(img_->image, cloud);
00417 points = cloud;
00418
00419 }
00420
00421 RecognitionModel::RecognitionModel()
00422 {
00423 }
00424
00426 bool loadMetaFile(const std::string& file_name, std::string& model_name, std::string& model_type, int& face_count) {
00427 TiXmlDocument doc(file_name);
00428 bool loaded = doc.LoadFile();
00429 if (!loaded)
00430 return false;
00431
00432 TiXmlHandle hdoc(&doc);
00433 TiXmlHandle model_handle = hdoc.FirstChildElement("model");
00434 TiXmlElement *model_element = model_handle.Element();
00435 if (model_element == NULL)
00436 return false;
00437 TiXmlElement *name_element = model_handle.FirstChildElement("name").Element();
00438 if (name_element == NULL)
00439 return false;
00440 model_name = name_element->GetText();
00441 TiXmlElement *type_element = model_handle.FirstChildElement("type").Element();
00442 if (type_element == NULL)
00443 return false;
00444 model_type = type_element->GetText();
00445 TiXmlElement *faces_element = model_handle.FirstChildElement("faces").Element();
00446 if (faces_element == NULL)
00447 return false;
00448 std::string faces_str = faces_element->GetText();
00449 face_count = boost::lexical_cast<int>(faces_str);
00450
00451 return true;
00452 }
00453
00454
00456 const char *boost_path_to_cstr(const boost::filesystem::path& path) {
00457 #if BOOST_VERSION >= 104600
00458 return path.c_str();
00459 #else
00460 return path.native_file_string().c_str();
00461 #endif
00462 }
00463
00464 bool RecognitionModel::loadFromPath(const std::string &path) {
00465 boost::filesystem::path dir(path);
00466
00467 if(!boost::filesystem::is_directory(dir)) {
00468 ROS_ERROR("Not a directory: %s", boost_path_to_cstr(dir));
00469 return false;
00470 }
00471
00472 boost::filesystem::path meta_path(dir / "meta.xml");
00473 if (!boost::filesystem::exists(meta_path)) {
00474 ROS_ERROR("No meta.xml file found in %s", boost_path_to_cstr(dir));
00475 return false;
00476 }
00477
00478 int face_count = 0;
00479 std::string model_type = "";
00480 bool loaded = loadMetaFile(boost_path_to_cstr(meta_path), model_name, model_type, face_count);
00481
00482 if (!loaded || model_name.empty() || (face_count == 0) || (model_type != "kinect_pcl")) {
00483 ROS_ERROR("invalid meta.xml in %s", boost_path_to_cstr(dir));
00484 return false;
00485 }
00486
00487 for(boost::filesystem::directory_iterator it(dir); it!=boost::filesystem::directory_iterator(); ++it)
00488 {
00489 boost::filesystem::path pref = *it;
00490 const static std::string FACE_PREFIX = "dense_face_";
00491
00492 #if BOOST_VERSION >= 104600
00493 if ((pref.filename().string().substr(0, FACE_PREFIX.length()) == FACE_PREFIX) && (pref.extension() == ".pcd")) {
00494 #else
00495 if ((pref.filename().substr(0, FACE_PREFIX.length()) == FACE_PREFIX) && (pref.extension() == ".pcd")) {
00496 #endif
00497 aspects.push_back(new ObjectAspect());
00498 ObjectAspect* aspect = aspects.back();
00499 aspect->fromFile(pref.string());
00500 if (aspects.size() > static_cast<size_t>(face_count)) {
00501 ROS_INFO("found more faces than specified in meta.xml. please check %s", boost_path_to_cstr(meta_path));
00502 break;
00503 }
00504 }
00505 }
00506
00507 if (aspects.size() < static_cast<size_t>(face_count)) {
00508 ROS_ERROR("meta.xml specified %i faces, but only %i were found.", face_count, aspects.size());
00509 return false;
00510 }
00511 if (aspects.empty()) {
00512 ROS_ERROR("no model files in path %s", path.c_str());
00513 return false;
00514 }
00515 return true;
00516 }
00517
00518
00519 bool RecognitionModel::matchAspects(ObjectAspect &other, Eigen::Matrix4f &model2scene) {
00520 double minerror = DBL_MAX;
00521
00522 for (size_t i = 0; i < aspects.size(); i ++) {
00523 ObjectAspect* model_aspect = aspects[i];
00524 Eigen::Matrix4f trafo;
00525 double error = model_aspect->matchAspects(other,trafo);
00526 if (error < minerror) {
00527 model2scene = trafo;
00528 other.match = model_aspect;
00529 minerror = error;
00530 ROS_INFO("HIT %d, minerror: %f",(int)i, minerror);
00531 }
00532 }
00533
00534 return minerror != DBL_MAX;
00535 }