sort_las.cpp
Go to the documentation of this file.
00001 #include <megatree/common.h>
00002 
00003 #include <unistd.h>
00004 #include <iostream>
00005 #include <argp.h>
00006 #include <float.h>
00007 
00008 #include <liblas/liblas.hpp>
00009 #include <fstream>
00010 #include <iostream>
00011 
00012 #include <boost/filesystem.hpp>
00013 
00014 using namespace megatree;
00015 
00016 class TempDirectory
00017 {
00018 public:
00019   TempDirectory(const std::string &prefix = "", bool remove = true)
00020     : remove_(remove)
00021   {
00022     std::string tmp_storage = prefix + "XXXXXX";
00023     char *tmp = mkdtemp(&tmp_storage[0]);
00024     assert(tmp);
00025     printf("Temporary directory: %s\n", tmp);
00026 
00027     path_ = tmp;
00028   }
00029 
00030   ~TempDirectory()
00031   {
00032     if (remove_)
00033       boost::filesystem::remove_all(path_);
00034   }
00035 
00036   const boost::filesystem::path &getPath() const
00037   {
00038     return path_;
00039   }
00040 
00041 private:
00042   boost::filesystem::path path_;
00043   bool remove_;
00044 };
00045 
00046 
00047 // Returns, in lo and hi, the minimum and maximum bounds of the points in las_filenames.
00048 void getExtents(const std::vector<std::string> &las_filenames, double lo[3], double hi[3])
00049 {
00050   lo[0] = lo[1] = lo[2] = DBL_MAX;
00051   hi[0] = hi[1] = hi[2] = DBL_MIN;
00052   
00053   for (size_t i = 0; i < las_filenames.size(); ++i)
00054   {
00055     std::ifstream fin;
00056     fin.open(las_filenames[i].c_str(), std::ios::in | std::ios::binary);
00057 
00058     liblas::ReaderFactory f;
00059     liblas::Reader reader = f.CreateWithStream(fin);
00060 
00061     liblas::Header const& header = reader.GetHeader();
00062     lo[0] = std::min(lo[0], header.GetMinX());
00063     lo[1] = std::min(lo[1], header.GetMinY());
00064     lo[2] = std::min(lo[2], header.GetMinZ());
00065     hi[0] = std::max(hi[0], header.GetMaxX());
00066     hi[1] = std::max(hi[1], header.GetMaxY());
00067     hi[2] = std::max(hi[2], header.GetMaxZ());
00068   }
00069 }
00070 
00071 // http://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/
00072 
00073 // "Insert" a 0 bit after each of the 16 low bits of x
00074 uint32_t Part1By1(uint32_t x)
00075 {
00076   x &= 0x0000ffff;                  // x = ---- ---- ---- ---- fedc ba98 7654 3210
00077   x = (x ^ (x <<  8)) & 0x00ff00ff; // x = ---- ---- fedc ba98 ---- ---- 7654 3210
00078   x = (x ^ (x <<  4)) & 0x0f0f0f0f; // x = ---- fedc ---- ba98 ---- 7654 ---- 3210
00079   x = (x ^ (x <<  2)) & 0x33333333; // x = --fe --dc --ba --98 --76 --54 --32 --10
00080   x = (x ^ (x <<  1)) & 0x55555555; // x = -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0
00081   return x;
00082 }
00083 
00084 // "Insert" two 0 bits after each of the 10 low bits of x
00085 uint32_t Part1By2(uint32_t x)
00086 {
00087   x &= 0x000003ff;                  // x = ---- ---- ---- ---- ---- --98 7654 3210
00088   x = (x ^ (x << 16)) & 0xff0000ff; // x = ---- --98 ---- ---- ---- ---- 7654 3210
00089   x = (x ^ (x <<  8)) & 0x0300f00f; // x = ---- --98 ---- ---- 7654 ---- ---- 3210
00090   x = (x ^ (x <<  4)) & 0x030c30c3; // x = ---- --98 ---- 76-- --54 ---- 32-- --10
00091   x = (x ^ (x <<  2)) & 0x09249249; // x = ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0
00092   return x;
00093 }
00094 
00095 uint32_t EncodeMorton2(uint32_t x, uint32_t y)
00096 {
00097   return (Part1By1(y) << 1) + Part1By1(x);
00098 }
00099 
00100 uint32_t EncodeMorton3(uint32_t x, uint32_t y, uint32_t z)
00101 {
00102   return (Part1By2(z) << 2) + (Part1By2(y) << 1) + Part1By2(x);
00103 }
00104 
00105 struct MortonPoint
00106 {
00107   uint32_t morton;
00108   double x, y, z;
00109   double r, g, b;
00110 
00111   bool operator<(const MortonPoint &mp) const
00112   {
00113     return morton < mp.morton;
00114   }
00115 };
00116 
00117 std::string chunkFilename(unsigned int level, unsigned int num)
00118 {
00119   char buf[255];
00120   snprintf(buf, 255, "%04u_%07u", level, num);
00121   return std::string(buf);
00122 }
00123 
00124 void writePoints(const std::vector<MortonPoint> &points, const boost::filesystem::path &path)
00125 {
00126   FILE* f = fopen(path.string().c_str(), "wb");
00127   assert(f);
00128   for (size_t i = 0; i < points.size(); ++i)
00129   {
00130     fwrite(&points[i], sizeof(MortonPoint), 1, f);
00131   }
00132   fclose(f);
00133 }
00134 
00135 void mergeChunks(const boost::filesystem::path& inpath1,
00136                  const boost::filesystem::path& inpath2,
00137                  const boost::filesystem::path& outpath)
00138 {
00139   printf("Merging %s and %s into ==> %s\n", inpath1.string().c_str(), inpath2.string().c_str(), outpath.string().c_str());
00140   FILE* fin1 = fopen(inpath1.string().c_str(), "rb");
00141   FILE* fin2 = fopen(inpath2.string().c_str(), "rb");
00142   FILE* fout = fopen(outpath.string().c_str(), "wb");
00143 
00144   assert(fin1);  assert(fin2);  assert(fout);
00145 
00146   MortonPoint p1, p2;
00147   if (fread(&p1, sizeof(MortonPoint), 1, fin1) < 1 ||
00148       fread(&p2, sizeof(MortonPoint), 1, fin2) < 1)
00149   {
00150     fprintf(stderr, "ERROR!  One of the files was empty!\n");
00151     abort();
00152   }
00153 
00154   bool file1_finished;
00155   while (true)
00156   {
00157     if (p1 < p2) {
00158       fwrite(&p1, sizeof(MortonPoint), 1, fout);
00159       size_t ret = fread(&p1, sizeof(MortonPoint), 1, fin1);
00160       if (ret < 1) {
00161         assert(feof(fin1));
00162         file1_finished = true;
00163         break;
00164       }
00165     }
00166     else {
00167       fwrite(&p2, sizeof(MortonPoint), 1, fout);
00168       size_t ret = fread(&p2, sizeof(MortonPoint), 1, fin2);
00169       if (ret < 1) {
00170         assert(feof(fin2));
00171         file1_finished = false;
00172         break;
00173       }
00174     }
00175   }
00176 
00177   // Copies over the rest of the data from whichever file didn't finish.
00178   if (file1_finished) {
00179     // File 2 didn't finish.
00180     while (true) {
00181       fwrite(&p2, sizeof(MortonPoint), 1, fout);
00182       size_t ret = fread(&p2, sizeof(MortonPoint), 1, fin2);
00183       if (ret < 1) {
00184         assert(feof(fin2));
00185         break;
00186       }
00187     }
00188   }
00189   else {
00190     // File 1 didn't finish.
00191     while (true) {
00192       fwrite(&p1, sizeof(MortonPoint), 1, fout);
00193       size_t ret = fread(&p1, sizeof(MortonPoint), 1, fin1);
00194       if (ret < 1) {
00195         assert(feof(fin1));
00196         break;
00197       }
00198     }
00199   }
00200 
00201   fclose(fin1);
00202   fclose(fin2);
00203   fclose(fout);
00204 }
00205 
00206 
00207 struct arguments_t {
00208   int chunk_size;
00209   char* tree;
00210   std::vector<std::string> las_filenames;
00211 };
00212 
00213 static int parse_opt(int key, char *arg, struct argp_state *state)
00214 {
00215   struct arguments_t *arguments = (arguments_t*)state->input;
00216   
00217   switch (key)
00218   {
00219   case 'c':
00220     arguments->chunk_size = parseNumberSuffixed(arg);
00221     break;
00222   case 't':
00223     arguments->tree = arg;
00224     break;
00225   case ARGP_KEY_ARG:
00226     arguments->las_filenames.push_back(arg);
00227     break;
00228   }
00229   return 0;
00230 }
00231 
00232 
00233 int main (int argc, char** argv)
00234 {
00235   // Default command line arguments
00236   struct arguments_t arguments;
00237   arguments.chunk_size = 10000000;
00238   arguments.tree = 0;
00239   
00240   // Parses command line options
00241   struct argp_option options[] = {
00242     {"chunk-size", 'c', "SIZE",   0,     "Chunk size (for initial sorts)"},
00243     { 0 }
00244   };
00245   struct argp argp = { options, parse_opt };
00246   int parse_ok = argp_parse(&argp, argc, argv, 0, 0, &arguments);
00247   printf("Arguments parsed: %d\n", parse_ok);
00248   printf("Chunk size: %d\n", arguments.chunk_size);
00249   assert(parse_ok == 0);
00250 
00251   if (arguments.las_filenames.size() == 0) {
00252     fprintf(stderr, "No files given.\n");
00253     exit(1);
00254   }
00255 
00256   double lo[3], hi[3];
00257   getExtents(arguments.las_filenames, lo, hi);
00258   printf("Points extend from (%lf, %lf, %lf) to (%lf, %lf, %lf)\n", lo[0], lo[1], lo[2], hi[0], hi[1], hi[2]);
00259 
00260   liblas::Header saved_header;
00261 
00262   TempDirectory scratch_dir("sorting", false);
00263 
00264   // ------------------------------------------------------------
00265   // Reads the las files into a set of sorted chunks of points.
00266   
00267   Tictoc timer;
00268   unsigned int chunk_number = 0;
00269   unsigned int point_index = 0;
00270   std::vector<MortonPoint> morton_points(arguments.chunk_size);
00271   for (size_t i = 0; i < arguments.las_filenames.size(); ++i)
00272   {
00273     // Opens this las file.
00274     printf("Loading las file: %s\n", arguments.las_filenames[i].c_str());
00275     std::ifstream fin;
00276     fin.open(arguments.las_filenames[i].c_str(), std::ios::in | std::ios::binary);
00277 
00278     liblas::ReaderFactory f;
00279     liblas::Reader reader = f.CreateWithStream(fin);
00280 
00281     liblas::Header const& header = reader.GetHeader();
00282     if (i == 0)
00283       saved_header = header;
00284 
00285     // Determines if the file contains color information, or just intensity.
00286     // TODO: I haven't seen a LAS file with color yet, so I don't know what fields to look for.
00287     bool has_color = false;  // !!header.GetSchema().GetDimension("R");
00288 
00289     // Adds each point into the tree
00290     std::vector<double> point(3, 0.0);
00291     std::vector<double> color(3, 0.0);
00292     while (reader.ReadNextPoint())
00293     {
00294       const liblas::Point& p = reader.GetPoint();
00295       point[0] = p.GetX();
00296       point[1] = p.GetY();
00297       point[2] = p.GetZ();
00298       if (has_color) {
00299         color[0] = p.GetColor().GetRed();
00300         color[1] = p.GetColor().GetGreen();
00301         color[2] = p.GetColor().GetBlue();
00302       }
00303       else {
00304         color[0] = color[1] = color[2] = double(p.GetIntensity()) / 65536.0 * 255.0; // TODO: is it [0,256) or [0,1]???
00305       }
00306 
00307       // TODO: uint32_t only gives 10 bits per coord.  Consider moving up to uint64 for fixed point coordinates
00308 
00309       // Converts the point to fixed-point coordinates.
00310       uint32_t fixed_coords[3];
00311       fixed_coords[0] = (uint32_t)( (point[0] - lo[0]) / (hi[0] - lo[0]) * (1<<10));
00312       fixed_coords[1] = (uint32_t)( (point[1] - lo[1]) / (hi[1] - lo[1]) * (1<<10));
00313       fixed_coords[2] = (uint32_t)( (point[2] - lo[2]) / (hi[2] - lo[2]) * (1<<10));
00314 
00315       // Computes the Morton code of the point.
00316       uint32_t morton = EncodeMorton3(fixed_coords[0], fixed_coords[1], fixed_coords[2]);
00317       //printf("(%lf, %lf, %lf) --> 0x%x\n", point[0]-lo[0], point[1]-lo[1], point[2]-lo[2], morton);
00318 
00319       morton_points[point_index].morton = morton;
00320       morton_points[point_index].x = point[0];
00321       morton_points[point_index].y = point[1];
00322       morton_points[point_index].z = point[2];
00323       morton_points[point_index].r = color[0];
00324       morton_points[point_index].g = color[1];
00325       morton_points[point_index].b = color[2];
00326 
00327       if (++point_index == (unsigned int)arguments.chunk_size)
00328       {
00329         // We've read in one chunk's worth of points.
00330 
00331         // Sorts the points.
00332         std::sort(morton_points.begin(), morton_points.end());
00333 
00334         // Writes out this chunk of points.
00335         writePoints(morton_points, scratch_dir.getPath() / chunkFilename(0, chunk_number));
00336         printf("Wrote chunk %6u\n", chunk_number);
00337 
00338         ++chunk_number;
00339         point_index = 0;
00340       }
00341     }
00342   }
00343 
00344   printf("All level 0 chunks written (%u chunks).  Moving onto the merging.\n", chunk_number);
00345 
00346   unsigned int merging_level = 0;
00347   unsigned int num_chunks = chunk_number;
00348   unsigned int num_chunks_next = 0;
00349   while (true)
00350   {
00351     for (unsigned int i = 0; i + 1 < num_chunks; i += 2)
00352     {
00353       printf("Level %u, combining %u and %u.\n", merging_level, i, i+1);
00354       mergeChunks(scratch_dir.getPath() / chunkFilename(merging_level, i),
00355                   scratch_dir.getPath() / chunkFilename(merging_level, i + 1),
00356                   scratch_dir.getPath() / chunkFilename(merging_level + 1, i / 2));
00357       // TODO: remove the old files.
00358       ++num_chunks_next;
00359     }
00360 
00361     // Is there an extra chunk with no partner?
00362     if (num_chunks % 2 == 1) {
00363       // Just copies up the extra chunk.
00364       printf("Copying chunk %u from level %u\n", num_chunks - 1, merging_level);
00365       boost::filesystem::copy_file(scratch_dir.getPath() / chunkFilename(merging_level, num_chunks - 1),
00366                                    scratch_dir.getPath() / chunkFilename(merging_level + 1, num_chunks / 2));
00367       ++num_chunks_next;
00368     }
00369 
00370     // Finished when we hit a single chunk.
00371     if (num_chunks_next == 1)
00372       break;
00373     
00374     num_chunks = num_chunks_next;
00375     num_chunks_next = 0;
00376     ++merging_level;
00377   }
00378 
00379   float t = timer.toc();
00380   printf("Sorting all files took %.3f seconds\n", t);
00381 
00382   boost::filesystem::path result_path = scratch_dir.getPath() / chunkFilename(merging_level + 1, 0);
00383 
00384   // Prepares a LAS file.
00385   std::ofstream ofs;
00386   ofs.open("sorted.las", std::ios::out | std::ios::binary);
00387   liblas::Header header;
00388   header.SetScale(saved_header.GetScaleX(), saved_header.GetScaleY(), saved_header.GetScaleZ());
00389   liblas::SpatialReference srs = saved_header.GetSRS();
00390   header.SetSRS(srs);
00391   header.SetSchema(saved_header.GetSchema());
00392   // TODO: may need to copy over more stuff from the header
00393   liblas::Writer writer(ofs, header);
00394 
00395   // Writes the result into a LAS file.
00396   FILE* fresult = fopen(result_path.string().c_str(), "rb");
00397   assert(fresult);
00398   while (true)
00399   {
00400     MortonPoint p;
00401     size_t ret = fread(&p, sizeof(MortonPoint), 1, fresult);
00402     if (ret < 1) {
00403       assert(feof(fresult));
00404       break;
00405     }
00406 
00407     liblas::Point point;
00408     point.SetCoordinates(p.x, p.y, p.z);
00409     writer.WritePoint(point);
00410   }
00411   fclose(fresult);
00412 
00413   t = timer.toc();
00414   printf("Finished after %.3f seconds (%.1f min).\n", t, t/60.0);
00415   return 0;
00416 }


megatree_import
Author(s): Wim Meeussen
autogenerated on Mon Dec 2 2013 13:01:33