00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028 #include "rtabmap/core/util3d_features.h"
00029
00030 #include "rtabmap/core/util2d.h"
00031 #include "rtabmap/core/util3d.h"
00032 #include "rtabmap/core/util3d_transforms.h"
00033 #include "rtabmap/core/util3d_correspondences.h"
00034 #include "rtabmap/core/util3d_motion_estimation.h"
00035
00036 #include "rtabmap/core/EpipolarGeometry.h"
00037
00038 #include <rtabmap/utilite/ULogger.h>
00039 #include <rtabmap/utilite/UConversion.h>
00040 #include <rtabmap/utilite/UMath.h>
00041 #include <rtabmap/utilite/UStl.h>
00042
00043 #include <opencv2/video/tracking.hpp>
00044
00045 namespace rtabmap
00046 {
00047
00048 namespace util3d
00049 {
00050
00051 std::vector<cv::Point3f> generateKeypoints3DDepth(
00052 const std::vector<cv::KeyPoint> & keypoints,
00053 const cv::Mat & depth,
00054 const CameraModel & cameraModel,
00055 float minDepth,
00056 float maxDepth)
00057 {
00058 UASSERT(cameraModel.isValidForProjection());
00059 std::vector<CameraModel> models;
00060 models.push_back(cameraModel);
00061 return generateKeypoints3DDepth(keypoints, depth, models, minDepth, maxDepth);
00062 }
00063
00064 std::vector<cv::Point3f> generateKeypoints3DDepth(
00065 const std::vector<cv::KeyPoint> & keypoints,
00066 const cv::Mat & depth,
00067 const std::vector<CameraModel> & cameraModels,
00068 float minDepth,
00069 float maxDepth)
00070 {
00071 UASSERT(!depth.empty() && (depth.type() == CV_32FC1 || depth.type() == CV_16UC1));
00072 UASSERT(cameraModels.size());
00073 std::vector<cv::Point3f> keypoints3d;
00074 if(!depth.empty())
00075 {
00076 UASSERT(int((depth.cols/cameraModels.size())*cameraModels.size()) == depth.cols);
00077 float subImageWidth = depth.cols/cameraModels.size();
00078 keypoints3d.resize(keypoints.size());
00079 float rgbToDepthFactorX = 1.0f/(cameraModels[0].imageWidth()>0?cameraModels[0].imageWidth()/subImageWidth:1);
00080 float rgbToDepthFactorY = 1.0f/(cameraModels[0].imageHeight()>0?cameraModels[0].imageHeight()/depth.rows:1);
00081 float bad_point = std::numeric_limits<float>::quiet_NaN ();
00082 for(unsigned int i=0; i<keypoints.size(); ++i)
00083 {
00084 float x = keypoints[i].pt.x*rgbToDepthFactorX;
00085 float y = keypoints[i].pt.y*rgbToDepthFactorY;
00086 int cameraIndex = int(x / subImageWidth);
00087 UASSERT_MSG(cameraIndex >= 0 && cameraIndex < (int)cameraModels.size(),
00088 uFormat("cameraIndex=%d, models=%d, kpt.x=%f, subImageWidth=%f (Camera model image width=%d)",
00089 cameraIndex, (int)cameraModels.size(), keypoints[i].pt.x, subImageWidth, cameraModels[0].imageWidth()).c_str());
00090
00091 pcl::PointXYZ ptXYZ = util3d::projectDepthTo3D(
00092 cameraModels.size()==1?depth:cv::Mat(depth, cv::Range::all(), cv::Range(subImageWidth*cameraIndex,subImageWidth*(cameraIndex+1))),
00093 x-subImageWidth*cameraIndex,
00094 y,
00095 cameraModels.at(cameraIndex).cx()*rgbToDepthFactorX,
00096 cameraModels.at(cameraIndex).cy()*rgbToDepthFactorY,
00097 cameraModels.at(cameraIndex).fx()*rgbToDepthFactorX,
00098 cameraModels.at(cameraIndex).fy()*rgbToDepthFactorY,
00099 true);
00100
00101 cv::Point3f pt(bad_point, bad_point, bad_point);
00102 if(pcl::isFinite(ptXYZ) &&
00103 (minDepth < 0.0f || ptXYZ.z > minDepth) &&
00104 (maxDepth <= 0.0f || ptXYZ.z <= maxDepth))
00105 {
00106 pt = cv::Point3f(ptXYZ.x, ptXYZ.y, ptXYZ.z);
00107 if(!cameraModels.at(cameraIndex).localTransform().isNull() &&
00108 !cameraModels.at(cameraIndex).localTransform().isIdentity())
00109 {
00110 pt = util3d::transformPoint(pt, cameraModels.at(cameraIndex).localTransform());
00111 }
00112 }
00113 keypoints3d.at(i) = pt;
00114 }
00115 }
00116 return keypoints3d;
00117 }
00118
00119 std::vector<cv::Point3f> generateKeypoints3DDisparity(
00120 const std::vector<cv::KeyPoint> & keypoints,
00121 const cv::Mat & disparity,
00122 const StereoCameraModel & stereoCameraModel,
00123 float minDepth,
00124 float maxDepth)
00125 {
00126 UASSERT(!disparity.empty() && (disparity.type() == CV_16SC1 || disparity.type() == CV_32F));
00127 UASSERT(stereoCameraModel.isValidForProjection());
00128 std::vector<cv::Point3f> keypoints3d;
00129 keypoints3d.resize(keypoints.size());
00130 float bad_point = std::numeric_limits<float>::quiet_NaN ();
00131 for(unsigned int i=0; i!=keypoints.size(); ++i)
00132 {
00133 cv::Point3f tmpPt = util3d::projectDisparityTo3D(
00134 keypoints[i].pt,
00135 disparity,
00136 stereoCameraModel);
00137
00138 cv::Point3f pt(bad_point, bad_point, bad_point);
00139 if(util3d::isFinite(tmpPt) &&
00140 (minDepth < 0.0f || tmpPt.z > minDepth) &&
00141 (maxDepth <= 0.0f || tmpPt.z <= maxDepth))
00142 {
00143 pt = tmpPt;
00144 if(!stereoCameraModel.left().localTransform().isNull() &&
00145 !stereoCameraModel.left().localTransform().isIdentity())
00146 {
00147 pt = util3d::transformPoint(pt, stereoCameraModel.left().localTransform());
00148 }
00149 }
00150 keypoints3d.at(i) = pt;
00151 }
00152 return keypoints3d;
00153 }
00154
00155 std::vector<cv::Point3f> generateKeypoints3DStereo(
00156 const std::vector<cv::Point2f> & leftCorners,
00157 const std::vector<cv::Point2f> & rightCorners,
00158 const StereoCameraModel & model,
00159 const std::vector<unsigned char> & mask,
00160 float minDepth,
00161 float maxDepth)
00162 {
00163 UASSERT(leftCorners.size() == rightCorners.size());
00164 UASSERT(mask.size() == 0 || leftCorners.size() == mask.size());
00165 UASSERT(model.left().fx()> 0.0f && model.baseline() > 0.0f);
00166
00167 std::vector<cv::Point3f> keypoints3d;
00168 keypoints3d.resize(leftCorners.size());
00169 float bad_point = std::numeric_limits<float>::quiet_NaN ();
00170 for(unsigned int i=0; i<leftCorners.size(); ++i)
00171 {
00172 cv::Point3f pt(bad_point, bad_point, bad_point);
00173 if(mask.empty() || mask[i])
00174 {
00175 float disparity = leftCorners[i].x - rightCorners[i].x;
00176 if(disparity != 0.0f)
00177 {
00178 cv::Point3f tmpPt = util3d::projectDisparityTo3D(
00179 leftCorners[i],
00180 disparity,
00181 model);
00182
00183 if(util3d::isFinite(tmpPt) &&
00184 (minDepth < 0.0f || tmpPt.z > minDepth) &&
00185 (maxDepth <= 0.0f || tmpPt.z <= maxDepth))
00186 {
00187 pt = tmpPt;
00188 if(!model.localTransform().isNull() &&
00189 !model.localTransform().isIdentity())
00190 {
00191 pt = util3d::transformPoint(pt, model.localTransform());
00192 }
00193 }
00194 }
00195 }
00196
00197 keypoints3d.at(i) = pt;
00198 }
00199 return keypoints3d;
00200 }
00201
00202
00203
00204
00205
00206 std::map<int, cv::Point3f> generateWords3DMono(
00207 const std::map<int, cv::KeyPoint> & refWords,
00208 const std::map<int, cv::KeyPoint> & nextWords,
00209 const CameraModel & cameraModel,
00210 Transform & cameraTransform,
00211 int pnpIterations,
00212 float pnpReprojError,
00213 int pnpFlags,
00214 int pnpRefineIterations,
00215 float ransacParam1,
00216 float ransacParam2,
00217 const std::map<int, cv::Point3f> & refGuess3D,
00218 double * varianceOut)
00219 {
00220 UASSERT(cameraModel.isValidForProjection());
00221 std::map<int, cv::Point3f> words3D;
00222 std::list<std::pair<int, std::pair<cv::KeyPoint, cv::KeyPoint> > > pairs;
00223 int pairsFound = EpipolarGeometry::findPairs(refWords, nextWords, pairs);
00224 UDEBUG("pairsFound=%d/%d", pairsFound, int(refWords.size()>nextWords.size()?refWords.size():nextWords.size()));
00225 if(pairsFound > 8)
00226 {
00227 std::vector<unsigned char> status;
00228 cv::Mat F = EpipolarGeometry::findFFromWords(pairs, status, ransacParam1, ransacParam2);
00229 if(!F.empty())
00230 {
00231
00232
00233 int oi = 0;
00234 UASSERT(status.size() == pairs.size());
00235 std::list<std::pair<int, std::pair<cv::KeyPoint, cv::KeyPoint> > >::iterator iter=pairs.begin();
00236 std::vector<cv::Point2f> refCorners(status.size());
00237 std::vector<cv::Point2f> newCorners(status.size());
00238 std::vector<int> indexes(status.size());
00239 for(unsigned int i=0; i<status.size(); ++i)
00240 {
00241 if(status[i])
00242 {
00243 refCorners[oi] = iter->second.first.pt;
00244 newCorners[oi] = iter->second.second.pt;
00245 indexes[oi] = iter->first;
00246 ++oi;
00247 }
00248 ++iter;
00249 }
00250 refCorners.resize(oi);
00251 newCorners.resize(oi);
00252 indexes.resize(oi);
00253
00254 UDEBUG("inliers=%d/%d", oi, pairs.size());
00255 if(oi > 3)
00256 {
00257 std::vector<cv::Point2f> refCornersRefined;
00258 std::vector<cv::Point2f> newCornersRefined;
00259 cv::correctMatches(F, refCorners, newCorners, refCornersRefined, newCornersRefined);
00260 refCorners = refCornersRefined;
00261 newCorners = newCornersRefined;
00262
00263 cv::Mat x(3, (int)refCorners.size(), CV_64FC1);
00264 cv::Mat xp(3, (int)refCorners.size(), CV_64FC1);
00265 for(unsigned int i=0; i<refCorners.size(); ++i)
00266 {
00267 x.at<double>(0, i) = refCorners[i].x;
00268 x.at<double>(1, i) = refCorners[i].y;
00269 x.at<double>(2, i) = 1;
00270
00271 xp.at<double>(0, i) = newCorners[i].x;
00272 xp.at<double>(1, i) = newCorners[i].y;
00273 xp.at<double>(2, i) = 1;
00274 }
00275
00276 cv::Mat K = cameraModel.K();
00277 cv::Mat Kinv = K.inv();
00278 cv::Mat E = K.t()*F*K;
00279 cv::Mat x_norm = Kinv * x;
00280 cv::Mat xp_norm = Kinv * xp;
00281 x_norm = x_norm.rowRange(0,2);
00282 xp_norm = xp_norm.rowRange(0,2);
00283
00284 cv::Mat P = EpipolarGeometry::findPFromE(E, x_norm, xp_norm);
00285 if(!P.empty())
00286 {
00287 cv::Mat P0 = cv::Mat::zeros(3, 4, CV_64FC1);
00288 P0.at<double>(0,0) = 1;
00289 P0.at<double>(1,1) = 1;
00290 P0.at<double>(2,2) = 1;
00291
00292 bool useCameraTransformGuess = !cameraTransform.isNull();
00293
00294 if(useCameraTransformGuess)
00295 {
00296 Transform t = (cameraModel.localTransform().inverse()*cameraTransform*cameraModel.localTransform()).inverse();
00297 P = (cv::Mat_<double>(3,4) <<
00298 (double)t.r11(), (double)t.r12(), (double)t.r13(), (double)t.x(),
00299 (double)t.r21(), (double)t.r22(), (double)t.r23(), (double)t.y(),
00300 (double)t.r31(), (double)t.r32(), (double)t.r33(), (double)t.z());
00301 }
00302
00303
00304
00305
00306
00307 cv::Mat pts4D;
00308 cv::triangulatePoints(P0, P, x_norm, xp_norm, pts4D);
00309
00310 UASSERT((int)indexes.size() == pts4D.cols && pts4D.rows == 4);
00311 for(unsigned int i=0; i<indexes.size(); ++i)
00312 {
00313
00314
00315
00316
00317 pts4D.col(i) /= pts4D.at<double>(3,i);
00318 if(pts4D.at<double>(2,i) > 0)
00319 {
00320 words3D.insert(std::make_pair(indexes[i], util3d::transformPoint(cv::Point3f(pts4D.at<double>(0,i), pts4D.at<double>(1,i), pts4D.at<double>(2,i)), cameraModel.localTransform())));
00321 }
00322 }
00323
00324 UDEBUG("ref guess=%d", (int)refGuess3D.size());
00325 if(refGuess3D.size())
00326 {
00327
00328 std::vector<cv::Point3f> inliersRef;
00329 std::vector<cv::Point3f> inliersRefGuess;
00330 util3d::findCorrespondences(
00331 words3D,
00332 refGuess3D,
00333 inliersRef,
00334 inliersRefGuess,
00335 0);
00336
00337 if(inliersRef.size())
00338 {
00339
00340 float scale = 1.0f;
00341 float variance = 1.0f;
00342 if(!useCameraTransformGuess)
00343 {
00344 std::multimap<float, float> scales;
00345 for(unsigned int i=0; i<inliersRef.size(); ++i)
00346 {
00347
00348 float s = inliersRefGuess.at(i).x/inliersRef.at(i).x;
00349 std::vector<float> errorSqrdDists(inliersRef.size());
00350 for(unsigned int j=0; j<inliersRef.size(); ++j)
00351 {
00352 cv::Point3f refPt = inliersRef.at(j);
00353 refPt.x *= s;
00354 refPt.y *= s;
00355 refPt.z *= s;
00356 const cv::Point3f & newPt = inliersRefGuess.at(j);
00357 errorSqrdDists[j] = uNormSquared(refPt.x-newPt.x, refPt.y-newPt.y, refPt.z-newPt.z);
00358 }
00359 std::sort(errorSqrdDists.begin(), errorSqrdDists.end());
00360 double median_error_sqr = (double)errorSqrdDists[errorSqrdDists.size () >> 1];
00361 float var = 2.1981 * median_error_sqr;
00362
00363
00364 scales.insert(std::make_pair(var, s));
00365 }
00366 scale = scales.begin()->second;
00367 variance = scales.begin()->first;;
00368 }
00369 else
00370 {
00371
00372 std::vector<float> errorSqrdDists(inliersRef.size());
00373 for(unsigned int j=0; j<inliersRef.size(); ++j)
00374 {
00375 const cv::Point3f & refPt = inliersRef.at(j);
00376 const cv::Point3f & newPt = inliersRefGuess.at(j);
00377 errorSqrdDists[j] = uNormSquared(refPt.x-newPt.x, refPt.y-newPt.y, refPt.z-newPt.z);
00378 }
00379 std::sort(errorSqrdDists.begin(), errorSqrdDists.end());
00380 double median_error_sqr = (double)errorSqrdDists[errorSqrdDists.size () >> 1];
00381 variance = 2.1981 * median_error_sqr;
00382 }
00383
00384 UDEBUG("scale used = %f (variance=%f)", scale, variance);
00385 if(varianceOut)
00386 {
00387 *varianceOut = variance;
00388 }
00389
00390 if(!useCameraTransformGuess)
00391 {
00392 std::vector<cv::Point3f> objectPoints(indexes.size());
00393 std::vector<cv::Point2f> imagePoints(indexes.size());
00394 int oi2=0;
00395 UASSERT(indexes.size() == newCorners.size());
00396 for(unsigned int i=0; i<indexes.size(); ++i)
00397 {
00398 std::map<int, cv::Point3f>::iterator iter = words3D.find(indexes[i]);
00399 if(iter!=words3D.end() && util3d::isFinite(iter->second))
00400 {
00401 iter->second.x *= scale;
00402 iter->second.y *= scale;
00403 iter->second.z *= scale;
00404 objectPoints[oi2].x = iter->second.x;
00405 objectPoints[oi2].y = iter->second.y;
00406 objectPoints[oi2].z = iter->second.z;
00407 imagePoints[oi2] = newCorners[i];
00408 ++oi2;
00409 }
00410 }
00411 objectPoints.resize(oi2);
00412 imagePoints.resize(oi2);
00413
00414
00415 Transform guess = cameraModel.localTransform().inverse();
00416 cv::Mat R = (cv::Mat_<double>(3,3) <<
00417 (double)guess.r11(), (double)guess.r12(), (double)guess.r13(),
00418 (double)guess.r21(), (double)guess.r22(), (double)guess.r23(),
00419 (double)guess.r31(), (double)guess.r32(), (double)guess.r33());
00420 cv::Mat rvec(1,3, CV_64FC1);
00421 cv::Rodrigues(R, rvec);
00422 cv::Mat tvec = (cv::Mat_<double>(1,3) << (double)guess.x(), (double)guess.y(), (double)guess.z());
00423 std::vector<int> inliersV;
00424 util3d::solvePnPRansac(
00425 objectPoints,
00426 imagePoints,
00427 K,
00428 cv::Mat(),
00429 rvec,
00430 tvec,
00431 true,
00432 pnpIterations,
00433 pnpReprojError,
00434 0,
00435 inliersV,
00436 pnpFlags,
00437 pnpRefineIterations);
00438
00439 UDEBUG("PnP inliers = %d / %d", (int)inliersV.size(), (int)objectPoints.size());
00440
00441 if(inliersV.size())
00442 {
00443 cv::Rodrigues(rvec, R);
00444 Transform pnp(R.at<double>(0,0), R.at<double>(0,1), R.at<double>(0,2), tvec.at<double>(0),
00445 R.at<double>(1,0), R.at<double>(1,1), R.at<double>(1,2), tvec.at<double>(1),
00446 R.at<double>(2,0), R.at<double>(2,1), R.at<double>(2,2), tvec.at<double>(2));
00447
00448 cameraTransform = (cameraModel.localTransform() * pnp).inverse();
00449 }
00450 else
00451 {
00452 UWARN("No inliers after PnP!");
00453 }
00454 }
00455 }
00456 else
00457 {
00458 UWARN("Cannot compute the scale, no points corresponding between the generated ref words and words guess");
00459 }
00460 }
00461 else if(!useCameraTransformGuess)
00462 {
00463 cv::Mat R, T;
00464 EpipolarGeometry::findRTFromP(P, R, T);
00465
00466 Transform t(R.at<double>(0,0), R.at<double>(0,1), R.at<double>(0,2), T.at<double>(0),
00467 R.at<double>(1,0), R.at<double>(1,1), R.at<double>(1,2), T.at<double>(1),
00468 R.at<double>(2,0), R.at<double>(2,1), R.at<double>(2,2), T.at<double>(2));
00469
00470 cameraTransform = (cameraModel.localTransform() * t).inverse() * cameraModel.localTransform();
00471 }
00472 }
00473 }
00474 }
00475 }
00476 UDEBUG("wordsSet=%d / %d", (int)words3D.size(), (int)refWords.size());
00477
00478 return words3D;
00479 }
00480
00481 std::multimap<int, cv::KeyPoint> aggregate(
00482 const std::list<int> & wordIds,
00483 const std::vector<cv::KeyPoint> & keypoints)
00484 {
00485 std::multimap<int, cv::KeyPoint> words;
00486 std::vector<cv::KeyPoint>::const_iterator kpIter = keypoints.begin();
00487 for(std::list<int>::const_iterator iter=wordIds.begin(); iter!=wordIds.end(); ++iter)
00488 {
00489 words.insert(std::pair<int, cv::KeyPoint >(*iter, *kpIter));
00490 ++kpIter;
00491 }
00492 return words;
00493 }
00494
00495 }
00496
00497 }