00001 #ifndef VOCABULARY_TREE_TREE_BUILDER_H
00002 #define VOCABULARY_TREE_TREE_BUILDER_H
00003
00004 #include "vocabulary_tree/mutable_tree.h"
00005 #include "vocabulary_tree/simple_kmeans.h"
00006 #include <deque>
00007
00008
00009 namespace vt {
00010
00015 template<class Feature, class Distance = distance::L2<Feature>,
00016 class FeatureAllocator = typename DefaultAllocator<Feature>::type>
00017 class TreeBuilder
00018 {
00019 public:
00020 typedef MutableVocabularyTree<Feature, Distance, FeatureAllocator> Tree;
00021 typedef SimpleKmeans<Feature, Distance, FeatureAllocator> Kmeans;
00022 typedef std::vector<Feature, FeatureAllocator> FeatureVector;
00023
00030 TreeBuilder(const Feature& zero = Feature(), Distance d = Distance());
00031
00041 void build(const FeatureVector& training_features, uint32_t k, uint32_t levels);
00042
00044 const Tree& tree() const { return tree_; }
00045
00047 Kmeans& kmeans() { return kmeans_; }
00049 const Kmeans& kmeans() const { return kmeans_; }
00050
00051 protected:
00052 Tree tree_;
00053 Kmeans kmeans_;
00054 Feature zero_;
00055 };
00056
00057
00058 template<class Feature, class Distance, class FeatureAllocator>
00059 TreeBuilder<Feature, Distance, FeatureAllocator>::TreeBuilder(const Feature& zero, Distance d)
00060 : tree_(d),
00061 kmeans_(zero, d),
00062 zero_(zero)
00063 {
00064 }
00065
00066 template<class Feature, class Distance, class FeatureAllocator>
00067 void TreeBuilder<Feature, Distance, FeatureAllocator>::build(const FeatureVector& training_features,
00068 uint32_t k, uint32_t levels)
00069 {
00070
00071 tree_.clear();
00072 tree_.setSize(levels, k);
00073 tree_.centers().reserve(tree_.nodes());
00074 tree_.validCenters().reserve(tree_.nodes());
00075
00076
00077
00078 std::deque< std::vector<Feature*> > subset_queue(1);
00079
00080
00081 std::vector<Feature*> &feature_ptrs = subset_queue.front();
00082 feature_ptrs.reserve(training_features.size());
00083 BOOST_FOREACH(const Feature& f, training_features)
00084 feature_ptrs.push_back(const_cast<Feature*>(&f));
00085
00086 FeatureVector centers;
00087 for (uint32_t level = 0; level < levels; ++level) {
00088
00089 std::vector<unsigned int> membership;
00090
00091 for (size_t i = 0, ie = subset_queue.size(); i < ie; ++i) {
00092 std::vector<Feature*> &subset = subset_queue.front();
00093
00094
00095
00096 if (subset.size() <= k) {
00097 for (size_t j = 0; j < subset.size(); ++j) {
00098 tree_.centers().push_back(*subset[j]);
00099 tree_.validCenters().push_back(1);
00100 }
00101
00102 tree_.centers().insert(tree_.centers().end(), k - subset.size(), zero_);
00103 tree_.validCenters().insert(tree_.validCenters().end(), k - subset.size(), 0);
00104
00105
00106 subset_queue.pop_front();
00107 subset_queue.insert(subset_queue.end(), k, std::vector<Feature*>());
00108 }
00109 else {
00110
00111 kmeans_.clusterPointers(subset, k, centers, membership);
00112
00113
00114 tree_.centers().insert(tree_.centers().end(), centers.begin(), centers.end());
00115 tree_.validCenters().insert(tree_.validCenters().end(), k, 1);
00116
00117
00118 std::vector< std::vector<Feature*> > new_subsets(k);
00119 for (size_t j = 0; j < subset.size(); ++j) {
00120 new_subsets[ membership[j] ].push_back(subset[j]);
00121 }
00122
00123
00124 subset_queue.pop_front();
00125 subset_queue.insert(subset_queue.end(), new_subsets.begin(), new_subsets.end());
00126 }
00127 }
00128
00129 }
00130 }
00131
00132 }
00133
00134 #endif