libfovis: main.cpp Source File

Go to the documentation of this file.
00001 #include <stdio.h>
00002 #include <stdlib.h>
00003 #include <string.h>
00004 #include <getopt.h>
00005 #include <sys/stat.h>
00006 #include <sys/types.h>
00007 
00008 #include <string>
00009 #include <vector>
00010 
00011 #include <fovis/fovis.hpp>
00012 
00013 #include "lodepng.h"
00014 #include "draw.hpp"
00015 
00016 //#define dbg(...) do { fprintf(stderr, __VA_ARGS__); fprintf(stderr, "\n"); } while(0)
00017 #define dbg(...)
00018 #define info(...) do { fprintf(stderr, __VA_ARGS__); fprintf(stderr, "\n"); } while(0)
00019 #define err(...) do { fprintf(stderr, __VA_ARGS__); fprintf(stderr, "\n"); } while(0)
00020 
00021 struct TimestampFilename
00022 {
00023   TimestampFilename(int64_t ts, const std::string& fname) :
00024     timestamp(ts), filename(fname)
00025   {}
00026   int64_t timestamp;
00027   std::string filename;
00028 };
00029 
00030 static std::vector<TimestampFilename> read_file_index(const std::string& indexFile)
00031 {
00032   FILE* fp = fopen(indexFile.c_str(), "r");
00033   std::vector<TimestampFilename> result;
00034   if(!fp) {
00035     perror("fopen");
00036     return result;
00037   }
00038   char linebuf[1024];
00039   int linenum = 0;
00040   while(!feof(fp)) {
00041     if(!fgets(linebuf, sizeof(linebuf), fp))
00042       break;
00043     linenum++;
00044     if(strlen(linebuf) == 0)
00045       break;
00046     if(linebuf[0] == '#')
00047       continue;
00048     long seconds;
00049     long microseconds;
00050     char png_fname[1024];
00051     if(3 != sscanf(linebuf, "%ld.%ld %s", &seconds, &microseconds, png_fname)) {
00052       err("%s:%d Parse error", indexFile.c_str(), linenum);
00053       fclose(fp);
00054       return result;
00055     }
00056     int64_t timestamp = seconds * 1000000 + microseconds;
00057     result.push_back(TimestampFilename(timestamp, png_fname));
00058   }
00059   return result;
00060 }
00061 
00062 static void write_pgm(const std::string& fname, const uint8_t* pixels,
00063     int width, int height, int stride)
00064 {
00065   FILE* fp = fopen(fname.c_str(), "wb");
00066   if(!fp) {
00067     perror("fopen");
00068     return;
00069   }
00070   fprintf(fp, "P5\n%d\n%d\n%d\n", width, height, 255);
00071   int row;
00072   for (row=0; row<height; row++) {
00073     if (1 != fwrite(pixels + row*stride, width, 1, fp)) {
00074       fclose(fp);
00075       return;
00076     }
00077   }
00078   fclose(fp);
00079   return;
00080 }
00081 
00082 static void usage()
00083 {
00084   fprintf(stderr, "usage: fv-tum-rgbd [options] <dataset_dir> <output_dir>\n"
00085       "\n"
00086       "This program demonstrates using FOVIS with the TUM-RGBD dataset\n"
00087       "\n"
00088       "   http://vision.in.tum.de/data/datasets/rgbd-dataset\n"
00089       "\n"
00090       "OPTIONS:\n"
00091       "\n"
00092       "  --help, -h       Shows this help text\n"
00093       "  --camera ID      Uses the calibration parameters for camera ID.  Must be\n"
00094       "                   one of 'fr1', 'fr2', or 'fr3'.  This options is required\n"
00095       "  --visualize, -v  Visualizes the motion estimation results by drawing\n"
00096       "                   reference and target images, features, and feature\n"
00097       "                   matches.  The visualized results are saved to individual\n"
00098       "                   PNG files in <output_dir>/vis\n"
00099       "\n"
00100       );
00101   exit(1);
00102 }
00103 
00104 int main(int argc, char **argv)
00105 {
00106   const char *optstring = "hc:v";
00107   int c;
00108   struct option long_opts[] = {
00109     { "help", no_argument, 0, 'h' },
00110     { "camera", required_argument, 0, 'c' },
00111     { "visualize", no_argument, 0, 'v' },
00112     { 0, 0, 0, 0 }
00113   };
00114 
00115   std::string camera_id;
00116   bool visualize = false;
00117 
00118   while ((c = getopt_long (argc, argv, optstring, long_opts, 0)) >= 0) {
00119     switch (c) {
00120       case 'c':
00121         camera_id = optarg;
00122         break;
00123       case 'v':
00124         visualize = true;
00125         break;
00126       default:
00127         usage();
00128         return 1;
00129     };
00130   }
00131 
00132   if(optind >= argc - 1) {
00133     usage();
00134   }
00135 
00136   std::string datadir = argv[optind];
00137   std::string outdir = argv[optind+1];
00138 
00139   if(camera_id.empty()) {
00140     err("Missing --camera parameter.  Run with -h for options.");
00141     return 1;
00142   }
00143 
00144   fovis::CameraIntrinsicsParameters camParams;
00145   camParams.width = 640;
00146   camParams.height = 480;
00147 
00148   // RGB camera parameters
00149   if(camera_id == "fr1") {
00150     camParams.fx = 517.306408;
00151     camParams.fy = 516.469215;
00152     camParams.cx = 318.643040;
00153     camParams.cy = 255.313989;
00154     camParams.k1 = 0.262383;
00155     camParams.k2 = -0.953104;
00156     camParams.p1 = -0.005358;
00157     camParams.p2 = 0.002628;
00158     camParams.k3 = 1.163314;
00159   } else if(camera_id == "fr2") {
00160     camParams.fx = 520.908620;
00161     camParams.fy = 521.007327;
00162     camParams.cx = 325.141442;
00163     camParams.cy = 249.701764;
00164     camParams.k1 = 0.231222;
00165     camParams.k2 = -0.784899;
00166     camParams.p1 = -0.003257;
00167     camParams.p2 = -0.000105;
00168     camParams.k3 =  0.917205;
00169   } else if(camera_id == "fr3") {
00170     camParams.fx = 537.960322;
00171     camParams.fy = 539.597659;
00172     camParams.cx = 319.183641;
00173     camParams.cy = 247.053820;
00174     camParams.k1 = 0.026370;
00175     camParams.k2 = -0.100086;
00176     camParams.p1 = 0.003138;
00177     camParams.p2 = 0.002421;
00178     camParams.k3 = 0.000000;
00179   } else {
00180     err("Unknown camera id [%s]", camera_id.c_str());
00181     return 1;
00182   }
00183 
00184   info("Loading data from: [%s]\n", datadir.c_str());
00185 
00186   fovis::Rectification rect(camParams);
00187 
00188   // If we wanted to play around with the different VO parameters, we could set
00189   // them here in the "options" variable.
00190   fovis::VisualOdometryOptions options =
00191       fovis::VisualOdometry::getDefaultOptions();
00192 
00193   // setup the visual odometry
00194   fovis::VisualOdometry odom(&rect, options);
00195 
00196   // create the output trajectory file
00197   std::string traj_fname = outdir + "/traj.txt";
00198   FILE* traj_fp = fopen(traj_fname.c_str(), "w");
00199   if(!traj_fp) {
00200     err("Unable to create %s - %s", traj_fname.c_str(), strerror(errno));
00201     return 1;
00202   }
00203   std::string gray_pgm_dir = outdir + "/gray";
00204   mkdir(gray_pgm_dir.c_str(), 0755);
00205   std::string depth_pgm_dir = outdir + "/depth";
00206   mkdir(depth_pgm_dir.c_str(), 0755);
00207   std::string vis_dir = outdir + "/vis";
00208   mkdir(vis_dir.c_str(), 0755);
00209 
00210   // read the RGB and depth index files
00211   std::vector<TimestampFilename> rgb_fnames =
00212     read_file_index(datadir + "/rgb.txt");
00213   std::vector<TimestampFilename> depth_fnames =
00214     read_file_index(datadir + "/depth.txt");
00215   int depth_fname_index = 0;
00216 
00217   DrawImage draw_img(camParams.width, camParams.height * 2);
00218 
00219   for(int rgb_fname_index=0;
00220       rgb_fname_index < rgb_fnames.size();
00221       rgb_fname_index++) {
00222 
00223     int64_t timestamp = rgb_fnames[rgb_fname_index].timestamp;
00224     std::string rgb_fname =
00225       datadir + "/" + rgb_fnames[rgb_fname_index].filename;
00226     long long ts_sec = timestamp / 1000000;
00227     long ts_usec = timestamp % 1000000;
00228     char ts_str[80];
00229     snprintf(ts_str, 80, "%lld.%06ld", ts_sec, ts_usec);
00230 
00231     // match the RGB image up with a depth image
00232     bool matched_depth_image = false;
00233     while(depth_fname_index < depth_fnames.size()) {
00234       int64_t depth_ts = depth_fnames[depth_fname_index].timestamp;
00235 
00236       // declare a depth image match if the depth image timestamp is within
00237       // 40ms of the RGB image.
00238       int64_t dt_usec = depth_ts - timestamp;
00239       if(dt_usec > 40000) {
00240         dbg("  stop %lld.%06ld (dt %f)",
00241             (long long)(depth_ts / 1000000),
00242             (long)(depth_ts % 1000000),
00243             dt_usec / 1000.);
00244         break;
00245       } else if(dt_usec < -40000) {
00246         dbg("  skip %lld.%06ld (dt %f)",
00247             (long long)(depth_ts / 1000000),
00248             (long)(depth_ts % 1000000),
00249             dt_usec / 1000.);
00250         depth_fname_index++;
00251       } else {
00252         matched_depth_image = true;
00253         dbg("  mtch %lld.%06ld (dt %f)",
00254             (long long)(depth_ts / 1000000),
00255             (long)(depth_ts % 1000000),
00256             dt_usec / 1000.);
00257         break;
00258       }
00259     }
00260 
00261     if(!matched_depth_image) {
00262       // didn't find a depth image with a close enough timestamp.  Skip this
00263       // RGB image.
00264       info("# skip %s", ts_str);
00265       continue;
00266     }
00267 
00268     std::string depth_fname =
00269       datadir + "/" + depth_fnames[depth_fname_index].filename;
00270     depth_fname_index++;
00271 
00272     // read RGB data
00273     std::vector<uint8_t> rgb_data;
00274     unsigned rgb_width;
00275     unsigned rgb_height;
00276     std::vector<uint8_t> rgb_png_data;
00277     lodepng::load_file(rgb_png_data, rgb_fname.c_str());
00278     if(rgb_png_data.empty()) {
00279       err("Failed to load %s", rgb_fname.c_str());
00280       continue;
00281     }
00282     if(0 != lodepng::decode(rgb_data, rgb_width, rgb_height,
00283         rgb_png_data, LCT_RGB, 8)) {
00284       err("Error decoding PNG %s", rgb_fname.c_str());
00285       continue;
00286     }
00287 
00288     // convert RGB data to grayscale
00289     std::vector<uint8_t> gray_data(rgb_width*rgb_height);
00290     const uint8_t* rgb_pixel = &rgb_data[0];
00291     for(int pixel_index=0; pixel_index<rgb_width*rgb_height; pixel_index++) {
00292       uint8_t r = rgb_pixel[0];
00293       uint8_t g = rgb_pixel[1];
00294       uint8_t b = rgb_pixel[2];
00295       gray_data[pixel_index] = (r + g + b) / 3;
00296       rgb_pixel += 3;
00297     }
00298 
00299     // write gray image out.
00300     write_pgm(outdir + "/gray/" + ts_str + "-gray.pgm",
00301         &gray_data[0], rgb_width, rgb_height, rgb_width);
00302 
00303     // read depth data
00304     std::vector<uint8_t> depth_data_u8;
00305     unsigned depth_width;
00306     unsigned depth_height;
00307     std::vector<uint8_t> depth_png_data;
00308     lodepng::load_file(depth_png_data, depth_fname.c_str());
00309     if(depth_png_data.empty()) {
00310       err("Failed to load %s", depth_fname.c_str());
00311       continue;
00312     }
00313     if(0 != lodepng::decode(depth_data_u8, depth_width, depth_height,
00314         depth_png_data, LCT_GREY, 16)) {
00315       err("Error decoding PNG %s", depth_fname.c_str());
00316       continue;
00317     }
00318 
00319     // convert depth data to a DepthImage object
00320     fovis::DepthImage depth_image(camParams, depth_width, depth_height);
00321     std::vector<float> depth_data(depth_width * depth_height);
00322     for(int i=0; i<depth_width*depth_height; i++) {
00323       // lodepng loads 16-bit PNGs into memory with big-endian ordering.
00324       // swizzle the bytes to get a uint16_t
00325       uint8_t high_byte = depth_data_u8[i*2];
00326       uint8_t low_byte = depth_data_u8[i*2+1];
00327       uint16_t data16 = (high_byte << 8) | low_byte;
00328       if(0 == data16)
00329         depth_data[i] = NAN;
00330       else
00331         depth_data[i] = data16 / float(5000);
00332     }
00333     depth_image.setDepthImage(&depth_data[0]);
00334 
00335     // debug depth image
00336     std::vector<uint8_t> depth_pgm_data(depth_width * depth_height);
00337     float depth_pgm_max_depth = 5;
00338     for(int i=0; i<depth_width*depth_height; i++) {
00339       if(isnan(depth_data[i]))
00340         depth_pgm_data[i] = 0;
00341       else {
00342         float d = depth_data[i];
00343         if(d > depth_pgm_max_depth)
00344           d = depth_pgm_max_depth;
00345         depth_pgm_data[i] = 255 * d / depth_pgm_max_depth;
00346       }
00347     }
00348     write_pgm(outdir + "/depth/" + ts_str + "-depth.pgm",
00349         &depth_pgm_data[0], depth_width, depth_height, depth_width);
00350 
00351     // process the frame
00352     odom.processFrame(&gray_data[0], &depth_image);
00353 
00354     fovis::MotionEstimateStatusCode status = odom.getMotionEstimateStatus();
00355     switch(status) {
00356       case fovis::INSUFFICIENT_INLIERS:
00357         printf("# %s - insufficient inliers\n", ts_str);
00358         break;
00359       case fovis::OPTIMIZATION_FAILURE:
00360         printf("# %s - optimization failed\n", ts_str);
00361         break;
00362       case fovis::REPROJECTION_ERROR:
00363         printf("# %s - reprojection error too high\n", ts_str);
00364         break;
00365       case fovis::NO_DATA:
00366         printf("# %s - no data\n", ts_str);
00367         break;
00368       case fovis::SUCCESS:
00369       default:
00370         break;
00371     }
00372 
00373     // get the integrated pose estimate.
00374     Eigen::Isometry3d cam_to_local = odom.getPose();
00375     Eigen::Vector3d t = cam_to_local.translation();
00376     Eigen::Quaterniond q(cam_to_local.rotation());
00377 
00378     printf("%lld.%06ld %f %f %f %f %f %f %f\n",
00379         ts_sec, ts_usec, t.x(), t.y(), t.z(), q.x(), q.y(), q.z(), q.w());
00380     fprintf(traj_fp, "%lld.%06ld %f %f %f %f %f %f %f\n",
00381         ts_sec, ts_usec, t.x(), t.y(), t.z(), q.x(), q.y(), q.z(), q.w());
00382 
00383     // visualization
00384     if(visualize) {
00385       DrawColor status_colors[] = {
00386         DrawColor(255, 255, 0),
00387         DrawColor(0, 255, 0),
00388         DrawColor(255, 0, 0),
00389         DrawColor(255, 0, 255),
00390         DrawColor(127, 127, 0)
00391       };
00392 
00393       DrawColor red(255, 0, 0);
00394       DrawColor green(0, 255, 0);
00395       DrawColor blue(0, 0, 255);
00396 
00397       memset(&draw_img.data[0], 0, draw_img.data.size());
00398 
00399       const fovis::OdometryFrame* ref_frame = odom.getReferenceFrame();
00400       const fovis::OdometryFrame* tgt_frame = odom.getTargetFrame();
00401 
00402       const fovis::PyramidLevel* ref_pyr_base = ref_frame->getLevel(0);
00403       const uint8_t* ref_img_data = ref_pyr_base->getGrayscaleImage();
00404       int ref_img_stride = ref_pyr_base->getGrayscaleImageStride();
00405 
00406       const fovis::PyramidLevel* tgt_pyr_base = tgt_frame->getLevel(0);
00407       const uint8_t* tgt_img_data = tgt_pyr_base->getGrayscaleImage();
00408       int tgt_img_stride = tgt_pyr_base->getGrayscaleImageStride();
00409 
00410       int tgt_yoff = ref_pyr_base->getHeight();
00411 
00412       // draw the reference frame on top
00413       draw_gray_img_rgb(ref_img_data, ref_pyr_base->getWidth(),
00414           ref_pyr_base->getHeight(), ref_img_stride, 0, 0, &draw_img);
00415 
00416       // draw the target frame on bottom
00417       draw_gray_img_rgb(tgt_img_data, tgt_pyr_base->getWidth(),
00418           tgt_pyr_base->getHeight(), tgt_img_stride, 0, tgt_yoff, &draw_img);
00419 
00420       const fovis::MotionEstimator* mestimator = odom.getMotionEstimator();
00421 
00422       int num_levels = ref_frame->getNumLevels();
00423       for(int level_index=0; level_index<num_levels; level_index++) {
00424         // draw reference features
00425         const fovis::PyramidLevel* ref_level = ref_frame->getLevel(level_index);
00426         int num_ref_keypoints = ref_level->getNumKeypoints();
00427         for(int ref_kp_ind=0; ref_kp_ind<num_ref_keypoints; ref_kp_ind++) {
00428           const fovis::KeypointData* ref_kp = ref_level->getKeypointData(ref_kp_ind);
00429           int ref_u = (int)round(ref_kp->base_uv.x());
00430           int ref_v = (int)round(ref_kp->base_uv.y());
00431           draw_box_rgb(ref_u-1, ref_v-1, ref_u+1, ref_v+1, blue, &draw_img);
00432         }
00433 
00434         // draw target features
00435         const fovis::PyramidLevel* tgt_level = tgt_frame->getLevel(level_index);
00436         int num_tgt_keypoints = tgt_level->getNumKeypoints();
00437         for(int tgt_kp_ind=0; tgt_kp_ind<num_tgt_keypoints; tgt_kp_ind++) {
00438           const fovis::KeypointData* tgt_kp = tgt_level->getKeypointData(tgt_kp_ind);
00439           int tgt_u = (int)round(tgt_kp->base_uv.x());
00440           int tgt_v = (int)round(tgt_kp->base_uv.y());
00441           draw_box_rgb(tgt_u-1, tgt_v-1 + tgt_yoff, tgt_u+1, tgt_v+1 + tgt_yoff,
00442               blue, &draw_img);
00443         }
00444       }
00445 
00446       const fovis::FeatureMatch* matches = mestimator->getMatches();
00447       int num_matches = mestimator->getNumMatches();
00448 
00449       // draw non-inlier matches
00450       for(int match_index=0; match_index<num_matches; match_index++) {
00451         const fovis::FeatureMatch& match = matches[match_index];
00452         if(match.inlier)
00453           continue;
00454         const fovis::KeypointData* ref_keypoint = match.ref_keypoint;
00455         const fovis::KeypointData* tgt_keypoint = match.target_keypoint;
00456 
00457         int ref_u = (int)round(ref_keypoint->base_uv.x());
00458         int ref_v = (int)round(ref_keypoint->base_uv.y());
00459 
00460         int tgt_u = (int)round(tgt_keypoint->base_uv.x());
00461         int tgt_v = (int)round(tgt_keypoint->base_uv.y());
00462 
00463         draw_line_rgb(ref_u, ref_v,
00464             tgt_u, tgt_v + tgt_yoff,
00465             red, &draw_img);
00466       }
00467 
00468       // draw inlier matches
00469       for(int match_index=0; match_index<num_matches; match_index++) {
00470         const fovis::FeatureMatch& match = matches[match_index];
00471         if(!match.inlier)
00472           continue;
00473         const fovis::KeypointData* ref_keypoint = match.ref_keypoint;
00474         const fovis::KeypointData* tgt_keypoint = match.target_keypoint;
00475 
00476         int ref_u = (int)round(ref_keypoint->base_uv.x());
00477         int ref_v = (int)round(ref_keypoint->base_uv.y());
00478 
00479         int tgt_u = (int)round(tgt_keypoint->base_uv.x());
00480         int tgt_v = (int)round(tgt_keypoint->base_uv.y());
00481 
00482         draw_line_rgb(ref_u, ref_v,
00483             tgt_u, tgt_v + tgt_yoff,
00484             green, &draw_img);
00485       }
00486 
00487       // draw a couple lines indicating the VO status
00488       draw_box_rgb(0, tgt_yoff - 1, draw_img.width,
00489           tgt_yoff + 1, status_colors[status], &draw_img);
00490 
00491       // save visualization
00492       std::string vis_fname = vis_dir + "/" + ts_str + "-vis.png";
00493       lodepng::encode(vis_fname, &draw_img.data[0], draw_img.width,
00494           draw_img.height, LCT_RGB);
00495     }
00496   }
00497 
00498   fclose(traj_fp);
00499 
00500   return 0;
00501 }