tree_builder.h
Go to the documentation of this file.
00001 #ifndef VOCABULARY_TREE_TREE_BUILDER_H
00002 #define VOCABULARY_TREE_TREE_BUILDER_H
00003 
00004 #include "vocabulary_tree/mutable_tree.h"
00005 #include "vocabulary_tree/simple_kmeans.h"
00006 #include <deque>
00007 //#include <cstdio> //DEBUG
00008 
00009 namespace vt {
00010 
00015 template<class Feature, class Distance = distance::L2<Feature>,
00016          class FeatureAllocator = typename DefaultAllocator<Feature>::type>
00017 class TreeBuilder
00018 {
00019 public:
00020   typedef MutableVocabularyTree<Feature, Distance, FeatureAllocator> Tree;
00021   typedef SimpleKmeans<Feature, Distance, FeatureAllocator> Kmeans;
00022   typedef std::vector<Feature, FeatureAllocator> FeatureVector;
00023 
00030   TreeBuilder(const Feature& zero = Feature(), Distance d = Distance());
00031 
00041   void build(const FeatureVector& training_features, uint32_t k, uint32_t levels);
00042 
00044   const Tree& tree() const { return tree_; }
00045 
00047   Kmeans& kmeans() { return kmeans_; }
00049   const Kmeans& kmeans() const { return kmeans_; }
00050 
00051 protected:
00052   Tree tree_;
00053   Kmeans kmeans_;
00054   Feature zero_;
00055 };
00056 
00057 
00058 template<class Feature, class Distance, class FeatureAllocator>
00059 TreeBuilder<Feature, Distance, FeatureAllocator>::TreeBuilder(const Feature& zero, Distance d)
00060   : tree_(d),
00061     kmeans_(zero, d),
00062     zero_(zero)
00063 {
00064 }
00065 
00066 template<class Feature, class Distance, class FeatureAllocator>
00067 void TreeBuilder<Feature, Distance, FeatureAllocator>::build(const FeatureVector& training_features,
00068                                                              uint32_t k, uint32_t levels)
00069 {
00070   // Initial setup and memory allocation for the tree
00071   tree_.clear();
00072   tree_.setSize(levels, k);
00073   tree_.centers().reserve(tree_.nodes());
00074   tree_.validCenters().reserve(tree_.nodes());
00075 
00076   // We keep a queue of disjoint feature subsets to cluster.
00077   // Feature* is used to avoid copying features.
00078   std::deque< std::vector<Feature*> > subset_queue(1);
00079 
00080   // At first the queue contains one "subset" containing all the features.
00081   std::vector<Feature*> &feature_ptrs = subset_queue.front();
00082   feature_ptrs.reserve(training_features.size());
00083   BOOST_FOREACH(const Feature& f, training_features)
00084     feature_ptrs.push_back(const_cast<Feature*>(&f));
00085 
00086   FeatureVector centers; // always size k
00087   for (uint32_t level = 0; level < levels; ++level) {
00088     //printf("# Level %u\n", level);
00089     std::vector<unsigned int> membership;
00090 
00091     for (size_t i = 0, ie = subset_queue.size(); i < ie; ++i) {
00092       std::vector<Feature*> &subset = subset_queue.front();
00093       //printf("#\tClustering subset of size %u\n", subset.size());
00094 
00095       // If the subset already has k or fewer elements, just use those as the centers.
00096       if (subset.size() <= k) {
00097         for (size_t j = 0; j < subset.size(); ++j) {
00098           tree_.centers().push_back(*subset[j]);
00099           tree_.validCenters().push_back(1);
00100         }
00101         // Mark non-existent centers as invalid.
00102         tree_.centers().insert(tree_.centers().end(), k - subset.size(), zero_);
00103         tree_.validCenters().insert(tree_.validCenters().end(), k - subset.size(), 0);
00104 
00105         // Push k empty subsets into the queue so all children get marked invalid.
00106         subset_queue.pop_front();
00107         subset_queue.insert(subset_queue.end(), k, std::vector<Feature*>());
00108       }
00109       else {
00110         // Cluster the current subset into k centers.
00111         kmeans_.clusterPointers(subset, k, centers, membership);
00112 
00113         // Add the centers and mark them as valid.
00114         tree_.centers().insert(tree_.centers().end(), centers.begin(), centers.end());
00115         tree_.validCenters().insert(tree_.validCenters().end(), k, 1);
00116 
00117         // Partition the current subset into k new subsets based on the cluster assignments.
00118         std::vector< std::vector<Feature*> > new_subsets(k);
00119         for (size_t j = 0; j < subset.size(); ++j) {
00120           new_subsets[ membership[j] ].push_back(subset[j]);
00121         }
00122 
00123         // Update the queue
00124         subset_queue.pop_front();
00125         subset_queue.insert(subset_queue.end(), new_subsets.begin(), new_subsets.end());
00126       }
00127     }
00128     //printf("# centers so far = %u\n", tree_.centers().size());
00129   }
00130 }
00131 
00132 } //namespace vt
00133 
00134 #endif


vocabulary_tree
Author(s): Patrick Mihelich
autogenerated on Thu Jan 2 2014 12:12:26