apriltags: TagDetector.cc Source File

Go to the documentation of this file.
00001 #include <algorithm>
00002 #include <cmath>
00003 #include <climits>
00004 #include <map>
00005 #include <vector>
00006 #include <iostream>
00007 
00008 #include <Eigen/Dense>
00009 
00010 #include "AprilTags/Edge.h"
00011 #include "AprilTags/FloatImage.h"
00012 #include "AprilTags/Gaussian.h"
00013 #include "AprilTags/GrayModel.h"
00014 #include "AprilTags/GLine2D.h"
00015 #include "AprilTags/GLineSegment2D.h"
00016 #include "AprilTags/Gridder.h"
00017 #include "AprilTags/Homography33.h"
00018 #include "AprilTags/MathUtil.h"
00019 #include "AprilTags/Quad.h"
00020 #include "AprilTags/Segment.h"
00021 #include "AprilTags/TagFamily.h"
00022 #include "AprilTags/UnionFindSimple.h"
00023 #include "AprilTags/XYWeight.h"
00024 
00025 #include "AprilTags/TagDetector.h"
00026 
00027 //#define DEBUG_APRIL
00028 
00029 #ifdef DEBUG_APRIL
00030 #include <opencv/cv.h>
00031 #include <opencv/highgui.h>
00032 #endif
00033 
00034 using namespace std;
00035 
00036 namespace AprilTags {
00037 
00038   std::vector<TagDetection> TagDetector::extractTags(const cv::Mat& image) {
00039 
00040     // convert to internal AprilTags image (todo: slow, change internally to OpenCV)
00041     int width = image.cols;
00042     int height = image.rows;
00043     AprilTags::FloatImage fimOrig(width, height);
00044     int i = 0;
00045     for (int y=0; y<height; y++) {
00046       for (int x=0; x<width; x++) {
00047         fimOrig.set(x, y, image.data[i]/255.);
00048         i++;
00049       }
00050     }
00051     std::pair<int,int> opticalCenter(width/2, height/2);
00052 
00053 #ifdef DEBUG_APRIL
00054 #if 0
00055   { // debug - write
00056     int height_ = fimOrig.getHeight();
00057     int width_  = fimOrig.getWidth();
00058     cv::Mat image(height_, width_, CV_8UC3);
00059     {
00060       for (int y=0; y<height_; y++) {
00061         for (int x=0; x<width_; x++) {
00062           cv::Vec3b v;
00063           //        float vf = fimMag.get(x,y);
00064           float vf = fimOrig.get(x,y);
00065           int val = (int)(vf * 255.);
00066           if ((val & 0xffff00) != 0) {printf("problem... %i\n", val);}
00067           for (int k=0; k<3; k++) {
00068             v(k) = val;
00069           }
00070           image.at<cv::Vec3b>(y, x) = v;
00071         }
00072       }
00073     }
00074     imwrite("out.bmp", image);
00075   }
00076 #endif
00077 #if 0
00078   FloatImage fimOrig = fimOrig_;
00079   { // debug - read
00080 
00081     cv::Mat image = cv::imread("test.bmp");
00082     int height_ = fimOrig.getHeight();
00083     int width_  = fimOrig.getWidth();
00084     {
00085       for (int y=0; y<height_; y++) {
00086         for (int x=0; x<width_; x++) {
00087           cv::Vec3b v = image.at<cv::Vec3b>(y,x);
00088           float val = (float)v(0)/255.;
00089           fimOrig.set(x,y,val);
00090         }
00091       }
00092     }
00093   }
00094 #endif
00095 #endif
00096 
00097   //================================================================
00098   // Step one: preprocess image (convert to grayscale) and low pass if necessary
00099 
00100   FloatImage fim = fimOrig;
00101   
00103 
00110   float sigma = 0;
00111 
00113 
00119   float segSigma = 0.8f;
00120 
00121   if (sigma > 0) {
00122     int filtsz = ((int) max(3.0f, 3*sigma)) | 1;
00123     std::vector<float> filt = Gaussian::makeGaussianFilter(sigma, filtsz);
00124     fim.filterFactoredCentered(filt, filt);
00125   }
00126 
00127   //================================================================
00128   // Step two: Compute the local gradient. We store the direction and magnitude.
00129   // This step is quite sensitve to noise, since a few bad theta estimates will
00130   // break up segments, causing us to miss Quads. It is useful to do a Gaussian
00131   // low pass on this step even if we don't want it for encoding.
00132 
00133   FloatImage fimSeg;
00134   if (segSigma > 0) {
00135     if (segSigma == sigma) {
00136       fimSeg = fim;
00137     } else {
00138       // blur anew
00139       int filtsz = ((int) max(3.0f, 3*segSigma)) | 1;
00140       std::vector<float> filt = Gaussian::makeGaussianFilter(segSigma, filtsz);
00141       fimSeg = fimOrig;
00142       fimSeg.filterFactoredCentered(filt, filt);
00143     }
00144   } else {
00145     fimSeg = fimOrig;
00146   }
00147 
00148   FloatImage fimTheta(fimSeg.getWidth(), fimSeg.getHeight());
00149   FloatImage fimMag(fimSeg.getWidth(), fimSeg.getHeight());
00150   
00151 
00152   #pragma omp parallel for
00153   for (int y = 1; y < fimSeg.getHeight()-1; y++) {
00154     for (int x = 1; x < fimSeg.getWidth()-1; x++) {
00155       float Ix = fimSeg.get(x+1, y) - fimSeg.get(x-1, y);
00156       float Iy = fimSeg.get(x, y+1) - fimSeg.get(x, y-1);
00157 
00158       float mag = Ix*Ix + Iy*Iy;
00159 #if 0 // kaess: fast version, but maybe less accurate?
00160       float theta = MathUtil::fast_atan2(Iy, Ix);
00161 #else
00162       float theta = atan2(Iy, Ix);
00163 #endif
00164       
00165       fimTheta.set(x, y, theta);
00166       fimMag.set(x, y, mag);
00167     }
00168   }
00169 
00170 #ifdef DEBUG_APRIL
00171   int height_ = fimSeg.getHeight();
00172   int width_  = fimSeg.getWidth();
00173   cv::Mat image(height_, width_, CV_8UC3);
00174   {
00175     for (int y=0; y<height_; y++) {
00176       for (int x=0; x<width_; x++) {
00177         cv::Vec3b v;
00178         //        float vf = fimMag.get(x,y);
00179         float vf = fimOrig.get(x,y);
00180         int val = (int)(vf * 255.);
00181         if ((val & 0xffff00) != 0) {printf("problem... %i\n", val);}
00182         for (int k=0; k<3; k++) {
00183           v(k) = val;
00184         }
00185         image.at<cv::Vec3b>(y, x) = v;
00186       }
00187     }
00188   }
00189 #endif
00190 
00191   //================================================================
00192   // Step three. Extract edges by grouping pixels with similar
00193   // thetas together. This is a greedy algorithm: we start with
00194   // the most similar pixels.  We use 4-connectivity.
00195   UnionFindSimple uf(fimSeg.getWidth()*fimSeg.getHeight());
00196   
00197   vector<Edge> edges(width*height*4);
00198   size_t nEdges = 0;
00199 
00200   // Bounds on the thetas assigned to this group. Note that because
00201   // theta is periodic, these are defined such that the average
00202   // value is contained *within* the interval.
00203   { // limit scope of storage
00204     /* Previously all this was on the stack, but this is 1.2MB for 320x240 images
00205      * That's already a problem for OS X (default 512KB thread stack size),
00206      * could be a problem elsewhere for bigger images... so store on heap */
00207     vector<float> storage(width*height*4);  // do all the memory in one big block, exception safe
00208     float * tmin = &storage[width*height*0];
00209     float * tmax = &storage[width*height*1];
00210     float * mmin = &storage[width*height*2];
00211     float * mmax = &storage[width*height*3];
00212                   
00213     for (int y = 0; y+1 < height; y++) {
00214       for (int x = 0; x+1 < width; x++) {
00215                                   
00216         float mag0 = fimMag.get(x,y);
00217         if (mag0 < Edge::minMag)
00218           continue;
00219         mmax[y*width+x] = mag0;
00220         mmin[y*width+x] = mag0;
00221                                   
00222         float theta0 = fimTheta.get(x,y);
00223         tmin[y*width+x] = theta0;
00224         tmax[y*width+x] = theta0;
00225                                   
00226         // Calculates then adds edges to 'vector<Edge> edges'
00227         Edge::calcEdges(theta0, x, y, fimTheta, fimMag, edges, nEdges);
00228                                   
00229         // XXX Would 8 connectivity help for rotated tags?
00230         // Probably not much, so long as input filtering hasn't been disabled.
00231       }
00232     }
00233                   
00234     edges.resize(nEdges);
00235     std::stable_sort(edges.begin(), edges.end());
00236     Edge::mergeEdges(edges,uf,tmin,tmax,mmin,mmax);
00237   }
00238           
00239   //================================================================
00240   // Step four: Loop over the pixels again, collecting statistics for each cluster.
00241   // We will soon fit lines (segments) to these points.
00242 
00243   map<int, vector<XYWeight> > clusters;
00244   for (int y = 0; y+1 < fimSeg.getHeight(); y++) {
00245     for (int x = 0; x+1 < fimSeg.getWidth(); x++) {
00246       if (uf.getSetSize(y*fimSeg.getWidth()+x) < Segment::minimumSegmentSize)
00247         continue;
00248 
00249       int rep = (int) uf.getRepresentative(y*fimSeg.getWidth()+x);
00250      
00251       map<int, vector<XYWeight> >::iterator it = clusters.find(rep);
00252       if ( it == clusters.end() ) {
00253         clusters[rep] = vector<XYWeight>();
00254         it = clusters.find(rep);
00255       }
00256       vector<XYWeight> &points = it->second;
00257       points.push_back(XYWeight(x,y,fimMag.get(x,y)));
00258     }
00259   }
00260 
00261   //================================================================
00262   // Step five: Loop over the clusters, fitting lines (which we call Segments).
00263   std::vector<Segment> segments; //used in Step six
00264   std::map<int, std::vector<XYWeight> >::const_iterator clustersItr;
00265   for (clustersItr = clusters.begin(); clustersItr != clusters.end(); clustersItr++) {
00266     std::vector<XYWeight> points = clustersItr->second;
00267     GLineSegment2D gseg = GLineSegment2D::lsqFitXYW(points);
00268 
00269     // filter short lines
00270     float length = MathUtil::distance2D(gseg.getP0(), gseg.getP1());
00271     if (length < Segment::minimumLineLength)
00272       continue;
00273 
00274     Segment seg;
00275     float dy = gseg.getP1().second - gseg.getP0().second;
00276     float dx = gseg.getP1().first - gseg.getP0().first;
00277 
00278     float tmpTheta = std::atan2(dy,dx);
00279 
00280     seg.setTheta(tmpTheta);
00281     seg.setLength(length);
00282 
00283     // We add an extra semantic to segments: the vector
00284     // p1->p2 will have dark on the left, white on the right.
00285     // To do this, we'll look at every gradient and each one
00286     // will vote for which way they think the gradient should
00287     // go. This is way more retentive than necessary: we
00288     // could probably sample just one point!
00289 
00290     float flip = 0, noflip = 0;
00291     for (unsigned int i = 0; i < points.size(); i++) {
00292       XYWeight xyw = points[i];
00293       
00294       float theta = fimTheta.get((int) xyw.x, (int) xyw.y);
00295       float mag = fimMag.get((int) xyw.x, (int) xyw.y);
00296 
00297       // err *should* be +M_PI/2 for the correct winding, but if we
00298       // got the wrong winding, it'll be around -M_PI/2.
00299       float err = MathUtil::mod2pi(theta - seg.getTheta());
00300 
00301       if (err < 0)
00302         noflip += mag;
00303       else
00304         flip += mag;
00305     }
00306 
00307     if (flip > noflip) {
00308       float temp = seg.getTheta() + (float)M_PI;
00309       seg.setTheta(temp);
00310     }
00311 
00312     float dot = dx*std::cos(seg.getTheta()) + dy*std::sin(seg.getTheta());
00313     if (dot > 0) {
00314       seg.setX0(gseg.getP1().first); seg.setY0(gseg.getP1().second);
00315       seg.setX1(gseg.getP0().first); seg.setY1(gseg.getP0().second);
00316     }
00317     else {
00318       seg.setX0(gseg.getP0().first); seg.setY0(gseg.getP0().second);
00319       seg.setX1(gseg.getP1().first); seg.setY1(gseg.getP1().second);
00320     }
00321 
00322     segments.push_back(seg);
00323   }
00324 
00325 #ifdef DEBUG_APRIL
00326 #if 0
00327   {
00328     for (vector<Segment>::iterator it = segments.begin(); it!=segments.end(); it++) {
00329       long int r = random();
00330       cv::line(image,
00331                cv::Point2f(it->getX0(), it->getY0()),
00332                cv::Point2f(it->getX1(), it->getY1()),
00333                cv::Scalar(r%0xff,(r%0xff00)>>8,(r%0xff0000)>>16,0) );
00334     }
00335   }
00336 #endif
00337 #endif
00338 
00339   // Step six: For each segment, find segments that begin where this segment ends.
00340   // (We will chain segments together next...) The gridder accelerates the search by
00341   // building (essentially) a 2D hash table.
00342   Gridder<Segment> gridder(0,0,width,height,10);
00343   
00344   // add every segment to the hash table according to the position of the segment's
00345   // first point. Remember that the first point has a specific meaning due to our
00346   // left-hand rule above.
00347   for (unsigned int i = 0; i < segments.size(); i++) {
00348     gridder.add(segments[i].getX0(), segments[i].getY0(), &segments[i]);
00349   }
00350   
00351   // Now, find child segments that begin where each parent segment ends.
00352   for (unsigned i = 0; i < segments.size(); i++) {
00353     Segment &parentseg = segments[i];
00354       
00355     //compute length of the line segment
00356     GLine2D parentLine(std::pair<float,float>(parentseg.getX0(), parentseg.getY0()),
00357                        std::pair<float,float>(parentseg.getX1(), parentseg.getY1()));
00358 
00359     Gridder<Segment>::iterator iter = gridder.find(parentseg.getX1(), parentseg.getY1(), 0.5f*parentseg.getLength());
00360     while(iter.hasNext()) {
00361       Segment &child = iter.next();
00362       if (MathUtil::mod2pi(child.getTheta() - parentseg.getTheta()) > 0) {
00363         continue;
00364       }
00365 
00366       // compute intersection of points
00367       GLine2D childLine(std::pair<float,float>(child.getX0(), child.getY0()),
00368                         std::pair<float,float>(child.getX1(), child.getY1()));
00369 
00370       std::pair<float,float> p = parentLine.intersectionWith(childLine);
00371       if (p.first == -1) {
00372         continue;
00373       }
00374 
00375       float parentDist = MathUtil::distance2D(p, std::pair<float,float>(parentseg.getX1(),parentseg.getY1()));
00376       float childDist = MathUtil::distance2D(p, std::pair<float,float>(child.getX0(),child.getY0()));
00377 
00378       if (max(parentDist,childDist) > parentseg.getLength()) {
00379         // cout << "intersection too far" << endl;
00380         continue;
00381       }
00382 
00383       // everything's OK, this child is a reasonable successor.
00384       parentseg.children.push_back(&child);
00385     }
00386   }
00387 
00388   //================================================================
00389   // Step seven: Search all connected segments to see if any form a loop of length 4.
00390   // Add those to the quads list.
00391   vector<Quad> quads;
00392   
00393   vector<Segment*> tmp(5);
00394   for (unsigned int i = 0; i < segments.size(); i++) {
00395     tmp[0] = &segments[i];
00396     Quad::search(fimOrig, tmp, segments[i], 0, quads, opticalCenter);
00397   }
00398 
00399 #ifdef DEBUG_APRIL
00400   {
00401     for (unsigned int qi = 0; qi < quads.size(); qi++ ) {
00402       Quad &quad = quads[qi];
00403       std::pair<float, float> p1 = quad.quadPoints[0];
00404       std::pair<float, float> p2 = quad.quadPoints[1];
00405       std::pair<float, float> p3 = quad.quadPoints[2];
00406       std::pair<float, float> p4 = quad.quadPoints[3];
00407       cv::line(image, cv::Point2f(p1.first, p1.second), cv::Point2f(p2.first, p2.second), cv::Scalar(0,0,255,0) );
00408       cv::line(image, cv::Point2f(p2.first, p2.second), cv::Point2f(p3.first, p3.second), cv::Scalar(0,0,255,0) );
00409       cv::line(image, cv::Point2f(p3.first, p3.second), cv::Point2f(p4.first, p4.second), cv::Scalar(0,0,255,0) );
00410       cv::line(image, cv::Point2f(p4.first, p4.second), cv::Point2f(p1.first, p1.second), cv::Scalar(0,0,255,0) );
00411 
00412       p1 = quad.interpolate(-1,-1);
00413       p2 = quad.interpolate(-1,1);
00414       p3 = quad.interpolate(1,1);
00415       p4 = quad.interpolate(1,-1);
00416       cv::circle(image, cv::Point2f(p1.first, p1.second), 3, cv::Scalar(0,255,0,0), 1);
00417       cv::circle(image, cv::Point2f(p2.first, p2.second), 3, cv::Scalar(0,255,0,0), 1);
00418       cv::circle(image, cv::Point2f(p3.first, p3.second), 3, cv::Scalar(0,255,0,0), 1);
00419       cv::circle(image, cv::Point2f(p4.first, p4.second), 3, cv::Scalar(0,255,0,0), 1);
00420     }
00421     cv::imshow("debug_april", image);
00422   }
00423 #endif
00424 
00425   //================================================================
00426   // Step eight. Decode the quads. For each quad, we first estimate a
00427   // threshold color to decide between 0 and 1. Then, we read off the
00428   // bits and see if they make sense.
00429 
00430   std::vector<TagDetection> detections;
00431 
00432   for (unsigned int qi = 0; qi < quads.size(); qi++ ) {
00433     Quad &quad = quads[qi];
00434 
00435     // Find a threshold
00436     GrayModel blackModel, whiteModel;
00437     const int dd = 2 * thisTagFamily.blackBorder + thisTagFamily.dimension;
00438 
00439     for (int iy = -1; iy <= dd; iy++) {
00440       float y = (iy + 0.5f) / dd;
00441       for (int ix = -1; ix <= dd; ix++) {
00442         float x = (ix + 0.5f) / dd;
00443         std::pair<float,float> pxy = quad.interpolate01(x, y);
00444         int irx = (int) (pxy.first + 0.5);
00445         int iry = (int) (pxy.second + 0.5);
00446         if (irx < 0 || irx >= width || iry < 0 || iry >= height)
00447           continue;
00448         float v = fim.get(irx, iry);
00449         if (iy == -1 || iy == dd || ix == -1 || ix == dd)
00450           whiteModel.addObservation(x, y, v);
00451         else if (iy == 0 || iy == (dd-1) || ix == 0 || ix == (dd-1))
00452           blackModel.addObservation(x, y, v);
00453       }
00454     }
00455 
00456     bool bad = false;
00457     unsigned long long tagCode = 0;
00458     for ( int iy = thisTagFamily.dimension-1; iy >= 0; iy-- ) {
00459       float y = (thisTagFamily.blackBorder + iy + 0.5f) / dd;
00460       for (int ix = 0; ix < thisTagFamily.dimension; ix++ ) {
00461         float x = (thisTagFamily.blackBorder + ix + 0.5f) / dd;
00462         std::pair<float,float> pxy = quad.interpolate01(x, y);
00463         int irx = (int) (pxy.first + 0.5);
00464         int iry = (int) (pxy.second + 0.5);
00465         if (irx < 0 || irx >= width || iry < 0 || iry >= height) {
00466           // cout << "*** bad:  irx=" << irx << "  iry=" << iry << endl;
00467           bad = true;
00468           continue;
00469         }
00470         float threshold = (blackModel.interpolate(x,y) + whiteModel.interpolate(x,y)) * 0.5f;
00471         float v = fim.get(irx, iry);
00472         tagCode = tagCode << 1;
00473         if ( v > threshold)
00474           tagCode |= 1;
00475 #ifdef DEBUG_APRIL
00476         {
00477           if (v>threshold)
00478             cv::circle(image, cv::Point2f(irx, iry), 1, cv::Scalar(0,0,255,0), 2);
00479           else
00480             cv::circle(image, cv::Point2f(irx, iry), 1, cv::Scalar(0,255,0,0), 2);
00481         }
00482 #endif
00483       }
00484     }
00485 
00486     if ( !bad ) {
00487       TagDetection thisTagDetection;
00488       thisTagFamily.decode(thisTagDetection, tagCode);
00489 
00490       // compute the homography (and rotate it appropriately)
00491       thisTagDetection.homography = quad.homography.getH();
00492       thisTagDetection.hxy = quad.homography.getCXY();
00493 
00494       float c = std::cos(thisTagDetection.rotation*(float)M_PI/2);
00495       float s = std::sin(thisTagDetection.rotation*(float)M_PI/2);
00496       Eigen::Matrix3d R;
00497       R.setZero();
00498       R(0,0) = R(1,1) = c;
00499       R(0,1) = -s;
00500       R(1,0) = s;
00501       R(2,2) = 1;
00502       Eigen::Matrix3d tmp;
00503       tmp = thisTagDetection.homography * R;
00504       thisTagDetection.homography = tmp;
00505 
00506       // Rotate points in detection according to decoded
00507       // orientation.  Thus the order of the points in the
00508       // detection object can be used to determine the
00509       // orientation of the target.
00510       std::pair<float,float> bottomLeft = thisTagDetection.interpolate(-1,-1);
00511       int bestRot = -1;
00512       float bestDist = FLT_MAX;
00513       for ( int i=0; i<4; i++ ) {
00514         float const dist = AprilTags::MathUtil::distance2D(bottomLeft, quad.quadPoints[i]);
00515         if ( dist < bestDist ) {
00516           bestDist = dist;
00517           bestRot = i;
00518         }
00519       }
00520 
00521       for (int i=0; i< 4; i++)
00522         thisTagDetection.p[i] = quad.quadPoints[(i+bestRot) % 4];
00523 
00524       if (thisTagDetection.good) {
00525         thisTagDetection.cxy = quad.interpolate01(0.5f, 0.5f);
00526         thisTagDetection.observedPerimeter = quad.observedPerimeter;
00527         detections.push_back(thisTagDetection);
00528       }
00529     }
00530   }
00531 
00532 #ifdef DEBUG_APRIL
00533   {
00534     cv::imshow("debug_april", image);
00535   }
00536 #endif
00537 
00538   //================================================================
00539   //Step nine: Some quads may be detected more than once, due to
00540   //partial occlusion and our aggressive attempts to recover from
00541   //broken lines. When two quads (with the same id) overlap, we will
00542   //keep the one with the lowest error, and if the error is the same,
00543   //the one with the greatest observed perimeter.
00544 
00545   std::vector<TagDetection> goodDetections;
00546 
00547   // NOTE: allow multiple non-overlapping detections of the same target.
00548 
00549   for ( vector<TagDetection>::const_iterator it = detections.begin();
00550         it != detections.end(); it++ ) {
00551     const TagDetection &thisTagDetection = *it;
00552 
00553     bool newFeature = true;
00554 
00555     for ( unsigned int odidx = 0; odidx < goodDetections.size(); odidx++) {
00556       TagDetection &otherTagDetection = goodDetections[odidx];
00557 
00558       if ( thisTagDetection.id != otherTagDetection.id ||
00559            ! thisTagDetection.overlapsTooMuch(otherTagDetection) )
00560         continue;
00561 
00562       // There's a conflict.  We must pick one to keep.
00563       newFeature = false;
00564 
00565       // This detection is worse than the previous one... just don't use it.
00566       if ( thisTagDetection.hammingDistance > otherTagDetection.hammingDistance )
00567         continue;
00568 
00569       // Otherwise, keep the new one if it either has strictly *lower* error, or greater perimeter.
00570       if ( thisTagDetection.hammingDistance < otherTagDetection.hammingDistance ||
00571            thisTagDetection.observedPerimeter > otherTagDetection.observedPerimeter )
00572         goodDetections[odidx] = thisTagDetection;
00573     }
00574 
00575      if ( newFeature )
00576        goodDetections.push_back(thisTagDetection);
00577 
00578   }
00579 
00580   //cout << "AprilTags: edges=" << nEdges << " clusters=" << clusters.size() << " segments=" << segments.size()
00581   //     << " quads=" << quads.size() << " detections=" << detections.size() << " unique tags=" << goodDetections.size() << endl;
00582 
00583   return goodDetections;
00584 }
00585 
00586 } // namespace