database.h
Go to the documentation of this file.
00001 #ifndef VOCABULARY_TREE_DATABASE_H
00002 #define VOCABULARY_TREE_DATABASE_H
00003 
00005 #include <vocabulary_tree/vocabulary_tree.h>
00006 #include <map>
00007 
00008 namespace vt {
00009 
00010 typedef uint32_t DocId;
00011 
00017 struct Match
00018 {
00019   DocId id;
00020   float score;
00021 
00022   Match() {}
00023   Match(DocId _id, float _score) : id(_id), score(_score) {}
00024 
00026   bool operator<(const Match& other) const
00027   {
00028     return score < other.score;
00029   }
00030 };
00031 
00032 // Remove these, just make docs more confusing
00033 typedef std::vector<Word> Document;
00034 typedef std::vector<Match> Matches;
00035 
00040 class Database
00041 {
00042 public:
00049   Database(uint32_t num_words = 0);
00050 
00057   DocId insert(const std::vector<Word>& document);
00058 
00066   void find(const std::vector<Word>& document, size_t N, std::vector<Match>& matches) const;
00067 
00077   DocId findAndInsert(const std::vector<Word>& document, size_t N, std::vector<Match>& matches);
00078 
00085   void computeTfIdfWeights(float default_weight = 1.0f);
00086 
00088   void saveWeights(const std::string& file) const;
00090   void loadWeights(const std::string& file);
00091 
00092   // Save weights and documents
00093   //void save(const std::string& file) const;
00094   //void load(const std::string& file);
00095 
00096 private:
00097   struct WordFrequency
00098   {
00099     DocId id;
00100     uint32_t count;
00101 
00102     WordFrequency(DocId _id, uint32_t _count) : id(_id), count(_count) {}
00103   };
00104   
00105   // Stored in increasing order by DocId
00106   typedef std::vector<WordFrequency> InvertedFile;
00107 
00109   // typedef std::vector< std::pair<Word, float> > DocumentVector;
00110   typedef std::map<Word, float> DocumentVector;
00111 
00112   std::vector<InvertedFile> word_files_;
00113   std::vector<float> word_weights_;
00114   std::vector<DocumentVector> database_vectors_; // Precomputed for inserted documents
00115 
00116   void computeVector(const std::vector<Word>& document, DocumentVector& v) const;
00117   
00118   static void normalize(DocumentVector& v);
00119   static float sparseDistance(const DocumentVector& v1, const DocumentVector& v2);
00120 };
00121 
00122 } //namespace vt
00123 
00124 #endif


vocabulary_tree
Author(s): Patrick Mihelich
autogenerated on Thu Jan 2 2014 12:12:26