analyze.cpp
Go to the documentation of this file.
00001 #include <argp.h>
00002 #include <unistd.h>
00003 #include <iostream>
00004 #include <vector>
00005 
00006 #include <megatree/common.h>
00007 #include <megatree/megatree.h>
00008 #include <megatree/storage_factory.h>
00009 #include <megatree/tree_functions.h>
00010 
00011 using namespace megatree;
00012 
00013 
00014 struct Analysis
00015 {
00016   Analysis()
00017     : num_nodes(0), num_leaves(0), max_depth(0)
00018   {
00019     memset(child_count_distr, 0, 9*sizeof(unsigned long));
00020     count_distr.resize(22);
00021     tendril_length_distr.resize(12);
00022   }
00023 
00024   unsigned long num_nodes;
00025   unsigned long num_leaves;
00026   unsigned int max_depth;
00027   unsigned long child_count_distr[9];  // Number of nodes with n children
00028   std::vector<unsigned long> leaf_depth_distr;  // Number of leaves with depth n
00029   std::vector<unsigned long> count_distr;
00030   std::vector<unsigned long> tendril_length_distr;
00031 
00032   // State variables for the current run.
00033   unsigned long tendril_length;
00034 
00035   void dump()
00036   {
00037     printf("analysis:\n");
00038     printf("  num_nodes:    %16lu\n", num_nodes);
00039     printf("  num_leaves:   %16lu\n", num_leaves);
00040     printf("  max_depth:    %16u\n", max_depth);
00041 
00042     printf("  child_count_distr: [");
00043     for (size_t i = 0; i < 8; ++i)
00044       printf(" %10lu,", child_count_distr[i]);
00045     printf(" %10lu ]\n", child_count_distr[8]);
00046 
00047     printf("  count_distr: [");
00048     for (size_t i = 0; i < count_distr.size() - 1; ++i)
00049       printf(" %6lu,", count_distr[i]);
00050     printf(" %6lu ]\n", count_distr.back());
00051 
00052     printf("  tendril_length_distr: [");
00053     for (size_t i = 0; i < tendril_length_distr.size() - 1; ++i)
00054       printf(" %6lu,", tendril_length_distr[i]);
00055     printf(" %6lu ]\n", tendril_length_distr.back());
00056 
00057     printf("\n\n");
00058     printf("Tendril nodes:   %10lu\n", count_distr[1] - num_leaves);
00059     printf("Stringy nodes:   %10lu\n", child_count_distr[1]);
00060     printf("\n\n");
00061     
00062   }
00063 };
00064 
00065 void analyze(MegaTree &tree, NodeHandle& node, Analysis& a, unsigned int depth=0)
00066 {
00067   if (a.num_nodes % 100000L == 0) {
00068     printf("At %5.1lfM: %s\n", double(a.num_nodes) / double(1000000), tree.toString().c_str());
00069     tree.resetCount();
00070   }
00071   a.num_nodes++;
00072   if (node.isLeaf())
00073     a.num_leaves++;
00074   a.max_depth = std::max(a.max_depth, depth);
00075   
00076   // Updates child count distribution
00077   int num_children = 0;
00078   for (size_t i = 0; i < 8; ++i) {
00079     if (node.hasChild(i))
00080       ++num_children;
00081   }
00082   a.child_count_distr[num_children]++;
00083 
00084   // Updates the leaf depth distribution.
00085   if (node.isLeaf()) {
00086     if (a.leaf_depth_distr.size() < depth + 1)
00087       a.leaf_depth_distr.resize(depth + 1);
00088     a.leaf_depth_distr[depth]++;
00089   }
00090 
00091   // Updates the node count distribution.
00092   if (node.getCount() >= a.count_distr.size() - 1)
00093     ++a.count_distr.back();
00094   else
00095     ++a.count_distr[node.getCount()];
00096 
00097   // Updates tendril statistics.
00098   if (node.isLeaf() && a.tendril_length > 0)
00099   {
00100     if (a.tendril_length >= a.tendril_length_distr.size())
00101       ++a.tendril_length_distr.back();
00102     else
00103       ++a.tendril_length_distr[a.tendril_length];
00104   }
00105   else if (node.getCount() == 1)
00106   {
00107     printf("node with count 1 that is not leaf\n");
00108     ++a.tendril_length;
00109   }
00110   else
00111     a.tendril_length = 0;
00112 
00113   // Descend
00114   for (MegaTree::ChildIterator it(tree, node); !it.finished(); it.next())
00115   {
00116     analyze(tree, it.getChildNode(), a, depth + 1);
00117   }
00118 }
00119 
00120 struct arguments_t {
00121   int cache_size;
00122   char* tree;
00123 };
00124 
00125 static int parse_opt(int key, char *arg, struct argp_state *state)
00126 {
00127   struct arguments_t *arguments = (arguments_t*)state->input;
00128   
00129   switch (key)
00130   {
00131   case 'c':
00132     arguments->cache_size = parseNumberSuffixed(arg);
00133     break;
00134   case 't':
00135     arguments->tree = arg;
00136     break;
00137   }
00138   return 0;
00139 }
00140 
00141 
00142 int main (int argc, char** argv)
00143 {
00144   // Default command line arguments
00145   struct arguments_t arguments;
00146   arguments.cache_size = 3000000;
00147   arguments.tree = 0;
00148   
00149   // Parses command line options
00150   struct argp_option options[] = {
00151     {"cache-size", 'c', "SIZE",   0,     "Cache size"},
00152     {"tree",       't', "TREE",   0,     "Path to tree"},
00153     { 0 }
00154   };
00155   struct argp argp = { options, parse_opt };
00156   int parse_ok = argp_parse(&argp, argc, argv, 0, 0, &arguments);
00157   printf("Arguments parsed: %d\n", parse_ok);
00158   printf("Cache size: %d\n", arguments.cache_size);
00159   printf("Tree: %s\n", arguments.tree);
00160 
00161   if (!arguments.tree) {
00162     fprintf(stderr, "No tree path given.\n");
00163     return 1;
00164   }
00165 
00166 
00167   boost::shared_ptr<Storage> storage(openStorage(arguments.tree));
00168   MegaTree tree(storage, arguments.cache_size, true);
00169   Analysis analysis;
00170 
00171   Tictoc overall_timer;
00172   NodeHandle root;
00173   tree.getRoot(root);
00174   analyze(tree, root, analysis);
00175   tree.releaseNode(root);
00176   float overall_time = overall_timer.toc();
00177 
00178   analysis.dump();
00179   printf("Walked %'lu points in %.3f seconds (%.1f min or %.1f hours)\n",
00180          tree.getNumPoints(), overall_time, overall_time/60.0f, overall_time/3600.0f);
00181 
00182   
00183   return 0;
00184 }


megatree_cpp
Author(s): Stuart Glaser
autogenerated on Thu Nov 28 2013 11:30:34