TemplatedVocabulary.h
Go to the documentation of this file.
1 
17 #ifndef __D_T_TEMPLATED_VOCABULARY__
18 #define __D_T_TEMPLATED_VOCABULARY__
19 
20 #include <cassert>
21 
22 #include <vector>
23 #include <numeric>
24 #include <fstream>
25 #include <string>
26 #include <algorithm>
27 #include <opencv2/core/core.hpp>
28 #include <limits>
29 
30 #include "FeatureVector.h"
31 #include "BowVector.h"
32 #include "ScoringObject.h"
33 
34 #include "../DUtils/Random.h"
35 
36 using namespace std;
37 
38 namespace DBoW2 {
39 
42 template<class TDescriptor, class F>
45 {
46 public:
47 
55  TemplatedVocabulary(int k = 10, int L = 5,
56  WeightingType weighting = TF_IDF, ScoringType scoring = L1_NORM);
57 
62  TemplatedVocabulary(const std::string &filename);
63 
68  TemplatedVocabulary(const char *filename);
69 
75 
79  virtual ~TemplatedVocabulary();
80 
89 
95  virtual void create
96  (const std::vector<std::vector<TDescriptor> > &training_features);
97 
105  virtual void create
106  (const std::vector<std::vector<TDescriptor> > &training_features,
107  int k, int L);
108 
114  virtual void create
115  (const std::vector<std::vector<TDescriptor> > &training_features,
116  int k, int L, WeightingType weighting, ScoringType scoring);
117 
122  virtual inline unsigned int size() const;
123 
128  virtual inline bool empty() const;
129 
135  virtual void transform(const std::vector<TDescriptor>& features, BowVector &v)
136  const;
137 
145  virtual void transform(const std::vector<TDescriptor>& features,
146  BowVector &v, FeatureVector &fv, int levelsup) const;
147 
153  virtual WordId transform(const TDescriptor& feature) const;
154 
162  inline double score(const BowVector &a, const BowVector &b) const;
163 
171  virtual NodeId getParentNode(WordId wid, int levelsup) const;
172 
179  void getWordsFromNode(NodeId nid, std::vector<WordId> &words) const;
180 
185  inline int getBranchingFactor() const { return m_k; }
186 
191  inline int getDepthLevels() const { return m_L; }
192 
197  float getEffectiveLevels() const;
198 
204  virtual inline TDescriptor getWord(WordId wid) const;
205 
211  virtual inline WordValue getWordWeight(WordId wid) const;
212 
217  inline WeightingType getWeightingType() const { return m_weighting; }
218 
223  inline ScoringType getScoringType() const { return m_scoring; }
224 
229  inline void setWeightingType(WeightingType type);
230 
235  void setScoringType(ScoringType type);
236 
241  bool loadFromTextFile(const std::string &filename);
242 
247  void saveToTextFile(const std::string &filename) const;
248 
253  void save(const std::string &filename) const;
254 
259  void load(const std::string &filename);
260 
265  virtual void save(cv::FileStorage &fs,
266  const std::string &name = "vocabulary") const;
267 
274  virtual void load(const cv::FileStorage &fs,
275  const std::string &name = "vocabulary");
276 
289  virtual int stopWords(double minWeight);
290 
291 protected:
292 
294  typedef const TDescriptor *pDescriptor;
295 
297  struct Node
298  {
304  vector<NodeId> children;
308  TDescriptor descriptor;
309 
312 
316  Node(): id(0), weight(0), parent(0), word_id(0){}
317 
322  Node(NodeId _id): id(_id), weight(0), parent(0), word_id(0){}
323 
328  inline bool isLeaf() const { return children.empty(); }
329  };
330 
331 protected:
332 
336  void createScoringObject();
337 
343  void getFeatures(
344  const vector<vector<TDescriptor> > &training_features,
345  vector<pDescriptor> &features) const;
346 
355  virtual void transform(const TDescriptor &feature,
356  WordId &id, WordValue &weight, NodeId* nid = NULL, int levelsup = 0) const;
357 
363  virtual void transform(const TDescriptor &feature, WordId &id) const;
364 
372  void HKmeansStep(NodeId parent_id, const vector<pDescriptor> &descriptors,
373  int current_level);
374 
380  virtual void initiateClusters(const vector<pDescriptor> &descriptors,
381  vector<TDescriptor> &clusters) const;
382 
389  void initiateClustersKMpp(const vector<pDescriptor> &descriptors,
390  vector<TDescriptor> &clusters) const;
391 
395  void createWords();
396 
403  void setNodeWeights(const vector<vector<TDescriptor> > &features);
404 
405 protected:
406 
408  int m_k;
409 
411  int m_L;
412 
415 
418 
421 
423  std::vector<Node> m_nodes;
424 
427  std::vector<Node*> m_words;
428 
429 };
430 
431 // --------------------------------------------------------------------------
432 
433 template<class TDescriptor, class F>
435  (int k, int L, WeightingType weighting, ScoringType scoring)
436  : m_k(k), m_L(L), m_weighting(weighting), m_scoring(scoring),
437  m_scoring_object(NULL)
438 {
439  createScoringObject();
440 }
441 
442 // --------------------------------------------------------------------------
443 
444 template<class TDescriptor, class F>
446  (const std::string &filename): m_scoring_object(NULL)
447 {
448  load(filename);
449 }
450 
451 // --------------------------------------------------------------------------
452 
453 template<class TDescriptor, class F>
455  (const char *filename): m_scoring_object(NULL)
456 {
457  load(filename);
458 }
459 
460 // --------------------------------------------------------------------------
461 
462 template<class TDescriptor, class F>
464 {
465  delete m_scoring_object;
466  m_scoring_object = NULL;
467 
468  switch(m_scoring)
469  {
470  case L1_NORM:
471  m_scoring_object = new L1Scoring;
472  break;
473 
474  case L2_NORM:
475  m_scoring_object = new L2Scoring;
476  break;
477 
478  case CHI_SQUARE:
479  m_scoring_object = new ChiSquareScoring;
480  break;
481 
482  case KL:
483  m_scoring_object = new KLScoring;
484  break;
485 
486  case BHATTACHARYYA:
487  m_scoring_object = new BhattacharyyaScoring;
488  break;
489 
490  case DOT_PRODUCT:
491  m_scoring_object = new DotProductScoring;
492  break;
493 
494  }
495 }
496 
497 // --------------------------------------------------------------------------
498 
499 template<class TDescriptor, class F>
501 {
502  m_scoring = type;
503  createScoringObject();
504 }
505 
506 // --------------------------------------------------------------------------
507 
508 template<class TDescriptor, class F>
510 {
511  this->m_weighting = type;
512 }
513 
514 // --------------------------------------------------------------------------
515 
516 template<class TDescriptor, class F>
519  : m_scoring_object(NULL)
520 {
521  *this = voc;
522 }
523 
524 // --------------------------------------------------------------------------
525 
526 template<class TDescriptor, class F>
528 {
529  delete m_scoring_object;
530 }
531 
532 // --------------------------------------------------------------------------
533 
534 template<class TDescriptor, class F>
538 {
539  this->m_k = voc.m_k;
540  this->m_L = voc.m_L;
541  this->m_scoring = voc.m_scoring;
542  this->m_weighting = voc.m_weighting;
543 
544  this->createScoringObject();
545 
546  this->m_nodes.clear();
547  this->m_words.clear();
548 
549  this->m_nodes = voc.m_nodes;
550  this->createWords();
551 
552  return *this;
553 }
554 
555 // --------------------------------------------------------------------------
556 
557 template<class TDescriptor, class F>
559  const std::vector<std::vector<TDescriptor> > &training_features)
560 {
561  m_nodes.clear();
562  m_words.clear();
563 
564  // expected_nodes = Sum_{i=0..L} ( k^i )
565  int expected_nodes =
566  (int)((pow((double)m_k, (double)m_L + 1) - 1)/(m_k - 1));
567 
568  m_nodes.reserve(expected_nodes); // avoid allocations when creating the tree
569 
570 
571  vector<pDescriptor> features;
572  getFeatures(training_features, features);
573 
574 
575  // create root
576  m_nodes.push_back(Node(0)); // root
577 
578  // create the tree
579  HKmeansStep(0, features, 1);
580 
581  // create the words
582  createWords();
583 
584  // and set the weight of each node of the tree
585  setNodeWeights(training_features);
586 
587 }
588 
589 // --------------------------------------------------------------------------
590 
591 template<class TDescriptor, class F>
593  const std::vector<std::vector<TDescriptor> > &training_features,
594  int k, int L)
595 {
596  m_k = k;
597  m_L = L;
598 
599  create(training_features);
600 }
601 
602 // --------------------------------------------------------------------------
603 
604 template<class TDescriptor, class F>
606  const std::vector<std::vector<TDescriptor> > &training_features,
607  int k, int L, WeightingType weighting, ScoringType scoring)
608 {
609  m_k = k;
610  m_L = L;
611  m_weighting = weighting;
612  m_scoring = scoring;
614 
615  create(training_features);
616 }
617 
618 // --------------------------------------------------------------------------
619 
620 template<class TDescriptor, class F>
622  const vector<vector<TDescriptor> > &training_features,
623  vector<pDescriptor> &features) const
624 {
625  features.resize(0);
626 
627  typename vector<vector<TDescriptor> >::const_iterator vvit;
628  typename vector<TDescriptor>::const_iterator vit;
629  for(vvit = training_features.begin(); vvit != training_features.end(); ++vvit)
630  {
631  features.reserve(features.size() + vvit->size());
632  for(vit = vvit->begin(); vit != vvit->end(); ++vit)
633  {
634  features.push_back(&(*vit));
635  }
636  }
637 }
638 
639 // --------------------------------------------------------------------------
640 
641 template<class TDescriptor, class F>
643  const vector<pDescriptor> &descriptors, int current_level)
644 {
645  if(descriptors.empty()) return;
646 
647  // features associated to each cluster
648  vector<TDescriptor> clusters;
649  vector<vector<unsigned int> > groups; // groups[i] = [j1, j2, ...]
650  // j1, j2, ... indices of descriptors associated to cluster i
651 
652  clusters.reserve(m_k);
653  groups.reserve(m_k);
654 
655  //const int msizes[] = { m_k, descriptors.size() };
656  //cv::SparseMat assoc(2, msizes, CV_8U);
657  //cv::SparseMat last_assoc(2, msizes, CV_8U);
659 
660  if((int)descriptors.size() <= m_k)
661  {
662  // trivial case: one cluster per feature
663  groups.resize(descriptors.size());
664 
665  for(unsigned int i = 0; i < descriptors.size(); i++)
666  {
667  groups[i].push_back(i);
668  clusters.push_back(*descriptors[i]);
669  }
670  }
671  else
672  {
673  // select clusters and groups with kmeans
674 
675  bool first_time = true;
676  bool goon = true;
677 
678  // to check if clusters move after iterations
679  vector<int> last_association, current_association;
680 
681  while(goon)
682  {
683  // 1. Calculate clusters
684 
685  if(first_time)
686  {
687  // random sample
688  initiateClusters(descriptors, clusters);
689  }
690  else
691  {
692  // calculate cluster centres
693 
694  for(unsigned int c = 0; c < clusters.size(); ++c)
695  {
696  vector<pDescriptor> cluster_descriptors;
697  cluster_descriptors.reserve(groups[c].size());
698 
699  /*
700  for(unsigned int d = 0; d < descriptors.size(); ++d)
701  {
702  if( assoc.find<unsigned char>(c, d) )
703  {
704  cluster_descriptors.push_back(descriptors[d]);
705  }
706  }
707  */
708 
709  vector<unsigned int>::const_iterator vit;
710  for(vit = groups[c].begin(); vit != groups[c].end(); ++vit)
711  {
712  cluster_descriptors.push_back(descriptors[*vit]);
713  }
714 
715 
716  F::meanValue(cluster_descriptors, clusters[c]);
717  }
718 
719  } // if(!first_time)
720 
721  // 2. Associate features with clusters
722 
723  // calculate distances to cluster centers
724  groups.clear();
725  groups.resize(clusters.size(), vector<unsigned int>());
726  current_association.resize(descriptors.size());
727 
728  //assoc.clear();
729 
730  typename vector<pDescriptor>::const_iterator fit;
731  //unsigned int d = 0;
732  for(fit = descriptors.begin(); fit != descriptors.end(); ++fit)//, ++d)
733  {
734  double best_dist = F::distance(*(*fit), clusters[0]);
735  unsigned int icluster = 0;
736 
737  for(unsigned int c = 1; c < clusters.size(); ++c)
738  {
739  double dist = F::distance(*(*fit), clusters[c]);
740  if(dist < best_dist)
741  {
742  best_dist = dist;
743  icluster = c;
744  }
745  }
746 
747  //assoc.ref<unsigned char>(icluster, d) = 1;
748 
749  groups[icluster].push_back(fit - descriptors.begin());
750  current_association[ fit - descriptors.begin() ] = icluster;
751  }
752 
753  // kmeans++ ensures all the clusters has any feature associated with them
754 
755  // 3. check convergence
756  if(first_time)
757  {
758  first_time = false;
759  }
760  else
761  {
762  //goon = !eqUChar(last_assoc, assoc);
763 
764  goon = false;
765  for(unsigned int i = 0; i < current_association.size(); i++)
766  {
767  if(current_association[i] != last_association[i]){
768  goon = true;
769  break;
770  }
771  }
772  }
773 
774  if(goon)
775  {
776  // copy last feature-cluster association
777  last_association = current_association;
778  //last_assoc = assoc.clone();
779  }
780 
781  } // while(goon)
782 
783  } // if must run kmeans
784 
785  // create nodes
786  for(unsigned int i = 0; i < clusters.size(); ++i)
787  {
788  NodeId id = m_nodes.size();
789  m_nodes.push_back(Node(id));
790  m_nodes.back().descriptor = clusters[i];
791  m_nodes.back().parent = parent_id;
792  m_nodes[parent_id].children.push_back(id);
793  }
794 
795  // go on with the next level
796  if(current_level < m_L)
797  {
798  // iterate again with the resulting clusters
799  const vector<NodeId> &children_ids = m_nodes[parent_id].children;
800  for(unsigned int i = 0; i < clusters.size(); ++i)
801  {
802  NodeId id = children_ids[i];
803 
804  vector<pDescriptor> child_features;
805  child_features.reserve(groups[i].size());
806 
807  vector<unsigned int>::const_iterator vit;
808  for(vit = groups[i].begin(); vit != groups[i].end(); ++vit)
809  {
810  child_features.push_back(descriptors[*vit]);
811  }
812 
813  if(child_features.size() > 1)
814  {
815  HKmeansStep(id, child_features, current_level + 1);
816  }
817  }
818  }
819 }
820 
821 // --------------------------------------------------------------------------
822 
823 template<class TDescriptor, class F>
825  (const vector<pDescriptor> &descriptors, vector<TDescriptor> &clusters) const
826 {
827  initiateClustersKMpp(descriptors, clusters);
828 }
829 
830 // --------------------------------------------------------------------------
831 
832 template<class TDescriptor, class F>
834  const vector<pDescriptor> &pfeatures, vector<TDescriptor> &clusters) const
835 {
836  // Implements kmeans++ seeding algorithm
837  // Algorithm:
838  // 1. Choose one center uniformly at random from among the data points.
839  // 2. For each data point x, compute D(x), the distance between x and the nearest
840  // center that has already been chosen.
841  // 3. Add one new data point as a center. Each point x is chosen with probability
842  // proportional to D(x)^2.
843  // 4. Repeat Steps 2 and 3 until k centers have been chosen.
844  // 5. Now that the initial centers have been chosen, proceed using standard k-means
845  // clustering.
846 
848 
849  clusters.resize(0);
850  clusters.reserve(m_k);
851  vector<double> min_dists(pfeatures.size(), std::numeric_limits<double>::max());
852 
853  // 1.
854 
855  int ifeature = DUtils::Random::RandomInt(0, pfeatures.size()-1);
856 
857  // create first cluster
858  clusters.push_back(*pfeatures[ifeature]);
859 
860  // compute the initial distances
861  typename vector<pDescriptor>::const_iterator fit;
862  vector<double>::iterator dit;
863  dit = min_dists.begin();
864  for(fit = pfeatures.begin(); fit != pfeatures.end(); ++fit, ++dit)
865  {
866  *dit = F::distance(*(*fit), clusters.back());
867  }
868 
869  while((int)clusters.size() < m_k)
870  {
871  // 2.
872  dit = min_dists.begin();
873  for(fit = pfeatures.begin(); fit != pfeatures.end(); ++fit, ++dit)
874  {
875  if(*dit > 0)
876  {
877  double dist = F::distance(*(*fit), clusters.back());
878  if(dist < *dit) *dit = dist;
879  }
880  }
881 
882  // 3.
883  double dist_sum = std::accumulate(min_dists.begin(), min_dists.end(), 0.0);
884 
885  if(dist_sum > 0)
886  {
887  double cut_d;
888  do
889  {
890  cut_d = DUtils::Random::RandomValue<double>(0, dist_sum);
891  } while(cut_d == 0.0);
892 
893  double d_up_now = 0;
894  for(dit = min_dists.begin(); dit != min_dists.end(); ++dit)
895  {
896  d_up_now += *dit;
897  if(d_up_now >= cut_d) break;
898  }
899 
900  if(dit == min_dists.end())
901  ifeature = pfeatures.size()-1;
902  else
903  ifeature = dit - min_dists.begin();
904 
905  clusters.push_back(*pfeatures[ifeature]);
906 
907  } // if dist_sum > 0
908  else
909  break;
910 
911  } // while(used_clusters < m_k)
912 
913 }
914 
915 // --------------------------------------------------------------------------
916 
917 template<class TDescriptor, class F>
919 {
920  m_words.resize(0);
921 
922  if(!m_nodes.empty())
923  {
924  m_words.reserve( (int)pow((double)m_k, (double)m_L) );
925 
926  typename vector<Node>::iterator nit;
927 
928  nit = m_nodes.begin(); // ignore root
929  for(++nit; nit != m_nodes.end(); ++nit)
930  {
931  if(nit->isLeaf())
932  {
933  nit->word_id = m_words.size();
934  m_words.push_back( &(*nit) );
935  }
936  }
937  }
938 }
939 
940 // --------------------------------------------------------------------------
941 
942 template<class TDescriptor, class F>
944  (const vector<vector<TDescriptor> > &training_features)
945 {
946  const unsigned int NWords = m_words.size();
947  const unsigned int NDocs = training_features.size();
948 
949  if(m_weighting == TF || m_weighting == BINARY)
950  {
951  // idf part must be 1 always
952  for(unsigned int i = 0; i < NWords; i++)
953  m_words[i]->weight = 1;
954  }
955  else if(m_weighting == IDF || m_weighting == TF_IDF)
956  {
957  // IDF and TF-IDF: we calculte the idf path now
958 
959  // Note: this actually calculates the idf part of the tf-idf score.
960  // The complete tf-idf score is calculated in ::transform
961 
962  vector<unsigned int> Ni(NWords, 0);
963  vector<bool> counted(NWords, false);
964 
965  typename vector<vector<TDescriptor> >::const_iterator mit;
966  typename vector<TDescriptor>::const_iterator fit;
967 
968  for(mit = training_features.begin(); mit != training_features.end(); ++mit)
969  {
970  fill(counted.begin(), counted.end(), false);
971 
972  for(fit = mit->begin(); fit < mit->end(); ++fit)
973  {
974  WordId word_id;
975  transform(*fit, word_id);
976 
977  if(!counted[word_id])
978  {
979  Ni[word_id]++;
980  counted[word_id] = true;
981  }
982  }
983  }
984 
985  // set ln(N/Ni)
986  for(unsigned int i = 0; i < NWords; i++)
987  {
988  if(Ni[i] > 0)
989  {
990  m_words[i]->weight = log((double)NDocs / (double)Ni[i]);
991  }// else // This cannot occur if using kmeans++
992  }
993 
994  }
995 
996 }
997 
998 // --------------------------------------------------------------------------
999 
1000 template<class TDescriptor, class F>
1001 inline unsigned int TemplatedVocabulary<TDescriptor,F>::size() const
1002 {
1003  return m_words.size();
1004 }
1005 
1006 // --------------------------------------------------------------------------
1007 
1008 template<class TDescriptor, class F>
1010 {
1011  return m_words.empty();
1012 }
1013 
1014 // --------------------------------------------------------------------------
1015 
1016 template<class TDescriptor, class F>
1018 {
1019  long sum = 0;
1020  typename std::vector<Node*>::const_iterator wit;
1021  for(wit = m_words.begin(); wit != m_words.end(); ++wit)
1022  {
1023  const Node *p = *wit;
1024 
1025  for(; p->id != 0; sum++) p = &m_nodes[p->parent];
1026  }
1027 
1028  return (float)((double)sum / (double)m_words.size());
1029 }
1030 
1031 // --------------------------------------------------------------------------
1032 
1033 template<class TDescriptor, class F>
1035 {
1036  return m_words[wid]->descriptor;
1037 }
1038 
1039 // --------------------------------------------------------------------------
1040 
1041 template<class TDescriptor, class F>
1043 {
1044  return m_words[wid]->weight;
1045 }
1046 
1047 // --------------------------------------------------------------------------
1048 
1049 template<class TDescriptor, class F>
1051  (const TDescriptor& feature) const
1052 {
1053  if(empty())
1054  {
1055  return 0;
1056  }
1057 
1058  WordId wid;
1059  transform(feature, wid);
1060  return wid;
1061 }
1062 
1063 // --------------------------------------------------------------------------
1064 
1065 template<class TDescriptor, class F>
1067  const std::vector<TDescriptor>& features, BowVector &v) const
1068 {
1069  v.clear();
1070 
1071  if(empty())
1072  {
1073  return;
1074  }
1075 
1076  // normalize
1077  LNorm norm;
1078  bool must = m_scoring_object->mustNormalize(norm);
1079 
1080  typename vector<TDescriptor>::const_iterator fit;
1081 
1082  if(m_weighting == TF || m_weighting == TF_IDF)
1083  {
1084  for(fit = features.begin(); fit < features.end(); ++fit)
1085  {
1086  WordId id;
1087  WordValue w;
1088  // w is the idf value if TF_IDF, 1 if TF
1089 
1090  transform(*fit, id, w);
1091 
1092  // not stopped
1093  if(w > 0) v.addWeight(id, w);
1094  }
1095 
1096  if(!v.empty() && !must)
1097  {
1098  // unnecessary when normalizing
1099  const double nd = v.size();
1100  for(BowVector::iterator vit = v.begin(); vit != v.end(); vit++)
1101  vit->second /= nd;
1102  }
1103 
1104  }
1105  else // IDF || BINARY
1106  {
1107  for(fit = features.begin(); fit < features.end(); ++fit)
1108  {
1109  WordId id;
1110  WordValue w;
1111  // w is idf if IDF, or 1 if BINARY
1112 
1113  transform(*fit, id, w);
1114 
1115  // not stopped
1116  if(w > 0) v.addIfNotExist(id, w);
1117 
1118  } // if add_features
1119  } // if m_weighting == ...
1120 
1121  if(must) v.normalize(norm);
1122 }
1123 
1124 // --------------------------------------------------------------------------
1125 
1126 template<class TDescriptor, class F>
1128  const std::vector<TDescriptor>& features,
1129  BowVector &v, FeatureVector &fv, int levelsup) const
1130 {
1131  v.clear();
1132  fv.clear();
1133 
1134  if(empty()) // safe for subclasses
1135  {
1136  return;
1137  }
1138 
1139  // normalize
1140  LNorm norm;
1141  bool must = m_scoring_object->mustNormalize(norm);
1142 
1143  typename vector<TDescriptor>::const_iterator fit;
1144 
1145  if(m_weighting == TF || m_weighting == TF_IDF)
1146  {
1147  unsigned int i_feature = 0;
1148  for(fit = features.begin(); fit < features.end(); ++fit, ++i_feature)
1149  {
1150  WordId id;
1151  NodeId nid;
1152  WordValue w;
1153  // w is the idf value if TF_IDF, 1 if TF
1154 
1155  transform(*fit, id, w, &nid, levelsup);
1156 
1157  if(w > 0) // not stopped
1158  {
1159  v.addWeight(id, w);
1160  fv.addFeature(nid, i_feature);
1161  }
1162  }
1163 
1164  if(!v.empty() && !must)
1165  {
1166  // unnecessary when normalizing
1167  const double nd = v.size();
1168  for(BowVector::iterator vit = v.begin(); vit != v.end(); vit++)
1169  vit->second /= nd;
1170  }
1171 
1172  }
1173  else // IDF || BINARY
1174  {
1175  unsigned int i_feature = 0;
1176  for(fit = features.begin(); fit < features.end(); ++fit, ++i_feature)
1177  {
1178  WordId id;
1179  NodeId nid;
1180  WordValue w;
1181  // w is idf if IDF, or 1 if BINARY
1182 
1183  transform(*fit, id, w, &nid, levelsup);
1184 
1185  if(w > 0) // not stopped
1186  {
1187  v.addIfNotExist(id, w);
1188  fv.addFeature(nid, i_feature);
1189  }
1190  }
1191  } // if m_weighting == ...
1192 
1193  if(must) v.normalize(norm);
1194 }
1195 
1196 // --------------------------------------------------------------------------
1197 
1198 template<class TDescriptor, class F>
1200  (const BowVector &v1, const BowVector &v2) const
1201 {
1202  return m_scoring_object->score(v1, v2);
1203 }
1204 
1205 // --------------------------------------------------------------------------
1206 
1207 template<class TDescriptor, class F>
1209  (const TDescriptor &feature, WordId &id) const
1210 {
1211  WordValue weight;
1212  transform(feature, id, weight);
1213 }
1214 
1215 // --------------------------------------------------------------------------
1216 
1217 template<class TDescriptor, class F>
1218 void TemplatedVocabulary<TDescriptor,F>::transform(const TDescriptor &feature,
1219  WordId &word_id, WordValue &weight, NodeId *nid, int levelsup) const
1220 {
1221  // propagate the feature down the tree
1222  vector<NodeId> nodes;
1223  typename vector<NodeId>::const_iterator nit;
1224 
1225  // level at which the node must be stored in nid, if given
1226  const int nid_level = m_L - levelsup;
1227  if(nid_level <= 0 && nid != NULL) *nid = 0; // root
1228 
1229  NodeId final_id = 0; // root
1230  int current_level = 0;
1231 
1232  do
1233  {
1234  ++current_level;
1235  nodes = m_nodes[final_id].children;
1236  final_id = nodes[0];
1237 
1238  double best_d = F::distance(feature, m_nodes[final_id].descriptor);
1239 
1240  for(nit = nodes.begin() + 1; nit != nodes.end(); ++nit)
1241  {
1242  NodeId id = *nit;
1243  double d = F::distance(feature, m_nodes[id].descriptor);
1244  if(d < best_d)
1245  {
1246  best_d = d;
1247  final_id = id;
1248  }
1249  }
1250 
1251  if(nid != NULL && current_level == nid_level)
1252  *nid = final_id;
1253 
1254  } while( !m_nodes[final_id].isLeaf() );
1255 
1256  // turn node id into word id
1257  word_id = m_nodes[final_id].word_id;
1258  weight = m_nodes[final_id].weight;
1259 }
1260 
1261 // --------------------------------------------------------------------------
1262 
1263 template<class TDescriptor, class F>
1265  (WordId wid, int levelsup) const
1266 {
1267  NodeId ret = m_words[wid]->id; // node id
1268  while(levelsup > 0 && ret != 0) // ret == 0 --> root
1269  {
1270  --levelsup;
1271  ret = m_nodes[ret].parent;
1272  }
1273  return ret;
1274 }
1275 
1276 // --------------------------------------------------------------------------
1277 
1278 template<class TDescriptor, class F>
1280  (NodeId nid, std::vector<WordId> &words) const
1281 {
1282  words.clear();
1283 
1284  if(m_nodes[nid].isLeaf())
1285  {
1286  words.push_back(m_nodes[nid].word_id);
1287  }
1288  else
1289  {
1290  words.reserve(m_k); // ^1, ^2, ...
1291 
1292  vector<NodeId> parents;
1293  parents.push_back(nid);
1294 
1295  while(!parents.empty())
1296  {
1297  NodeId parentid = parents.back();
1298  parents.pop_back();
1299 
1300  const vector<NodeId> &child_ids = m_nodes[parentid].children;
1301  vector<NodeId>::const_iterator cit;
1302 
1303  for(cit = child_ids.begin(); cit != child_ids.end(); ++cit)
1304  {
1305  const Node &child_node = m_nodes[*cit];
1306 
1307  if(child_node.isLeaf())
1308  words.push_back(child_node.word_id);
1309  else
1310  parents.push_back(*cit);
1311 
1312  } // for each child
1313  } // while !parents.empty
1314  }
1315 }
1316 
1317 // --------------------------------------------------------------------------
1318 
1319 template<class TDescriptor, class F>
1321 {
1322  int c = 0;
1323  typename vector<Node*>::iterator wit;
1324  for(wit = m_words.begin(); wit != m_words.end(); ++wit)
1325  {
1326  if((*wit)->weight < minWeight)
1327  {
1328  ++c;
1329  (*wit)->weight = 0;
1330  }
1331  }
1332  return c;
1333 }
1334 
1335 // --------------------------------------------------------------------------
1336 
1337 template<class TDescriptor, class F>
1339 {
1340  ifstream f;
1341  f.open(filename.c_str());
1342 
1343  if(f.eof())
1344  return false;
1345 
1346  m_words.clear();
1347  m_nodes.clear();
1348 
1349  string s;
1350  getline(f,s);
1351  stringstream ss;
1352  ss << s;
1353  ss >> m_k;
1354  ss >> m_L;
1355  int n1, n2;
1356  ss >> n1;
1357  ss >> n2;
1358 
1359  if(m_k<0 || m_k>20 || m_L<1 || m_L>10 || n1<0 || n1>5 || n2<0 || n2>3)
1360  {
1361  std::cerr << "Vocabulary loading failure: This is not a correct text file!" << endl;
1362  return false;
1363  }
1364 
1365  m_scoring = (ScoringType)n1;
1366  m_weighting = (WeightingType)n2;
1368 
1369  // nodes
1370  int expected_nodes =
1371  (int)((pow((double)m_k, (double)m_L + 1) - 1)/(m_k - 1));
1372  m_nodes.reserve(expected_nodes);
1373 
1374  m_words.reserve(pow((double)m_k, (double)m_L + 1));
1375 
1376  m_nodes.resize(1);
1377  m_nodes[0].id = 0;
1378  while(!f.eof())
1379  {
1380  string snode;
1381  getline(f,snode);
1382  stringstream ssnode;
1383  ssnode << snode;
1384 
1385  int nid = m_nodes.size();
1386  m_nodes.resize(m_nodes.size()+1);
1387  m_nodes[nid].id = nid;
1388 
1389  int pid ;
1390  ssnode >> pid;
1391  m_nodes[nid].parent = pid;
1392  m_nodes[pid].children.push_back(nid);
1393 
1394  int nIsLeaf;
1395  ssnode >> nIsLeaf;
1396 
1397  stringstream ssd;
1398  for(int iD=0;iD<F::L;iD++)
1399  {
1400  string sElement;
1401  ssnode >> sElement;
1402  ssd << sElement << " ";
1403  }
1404  F::fromString(m_nodes[nid].descriptor, ssd.str());
1405 
1406  ssnode >> m_nodes[nid].weight;
1407 
1408  if(nIsLeaf>0)
1409  {
1410  int wid = m_words.size();
1411  m_words.resize(wid+1);
1412 
1413  m_nodes[nid].word_id = wid;
1414  m_words[wid] = &m_nodes[nid];
1415  }
1416  else
1417  {
1418  m_nodes[nid].children.reserve(m_k);
1419  }
1420  }
1421 
1422  return true;
1423 
1424 }
1425 
1426 // --------------------------------------------------------------------------
1427 
1428 template<class TDescriptor, class F>
1429 void TemplatedVocabulary<TDescriptor,F>::saveToTextFile(const std::string &filename) const
1430 {
1431  fstream f;
1432  f.open(filename.c_str(),ios_base::out);
1433  f << m_k << " " << m_L << " " << " " << m_scoring << " " << m_weighting << endl;
1434 
1435  for(size_t i=1; i<m_nodes.size();i++)
1436  {
1437  const Node& node = m_nodes[i];
1438 
1439  f << node.parent << " ";
1440  if(node.isLeaf())
1441  f << 1 << " ";
1442  else
1443  f << 0 << " ";
1444 
1445  f << F::toString(node.descriptor) << " " << (double)node.weight << endl;
1446  }
1447 
1448  f.close();
1449 }
1450 
1451 // --------------------------------------------------------------------------
1452 
1453 template<class TDescriptor, class F>
1454 void TemplatedVocabulary<TDescriptor,F>::save(const std::string &filename) const
1455 {
1456  cv::FileStorage fs(filename.c_str(), cv::FileStorage::WRITE);
1457  if(!fs.isOpened()) throw string("Could not open file ") + filename;
1458 
1459  save(fs);
1460 }
1461 
1462 // --------------------------------------------------------------------------
1463 
1464 template<class TDescriptor, class F>
1465 void TemplatedVocabulary<TDescriptor,F>::load(const std::string &filename)
1466 {
1467  cv::FileStorage fs(filename.c_str(), cv::FileStorage::READ);
1468  if(!fs.isOpened()) throw string("Could not open file ") + filename;
1469 
1470  this->load(fs);
1471 }
1472 
1473 // --------------------------------------------------------------------------
1474 
1475 template<class TDescriptor, class F>
1477  const std::string &name) const
1478 {
1479  // Format YAML:
1480  // vocabulary
1481  // {
1482  // k:
1483  // L:
1484  // scoringType:
1485  // weightingType:
1486  // nodes
1487  // [
1488  // {
1489  // nodeId:
1490  // parentId:
1491  // weight:
1492  // descriptor:
1493  // }
1494  // ]
1495  // words
1496  // [
1497  // {
1498  // wordId:
1499  // nodeId:
1500  // }
1501  // ]
1502  // }
1503  //
1504  // The root node (index 0) is not included in the node vector
1505  //
1506 
1507  f << name << "{";
1508 
1509  f << "k" << m_k;
1510  f << "L" << m_L;
1511  f << "scoringType" << m_scoring;
1512  f << "weightingType" << m_weighting;
1513 
1514  // tree
1515  f << "nodes" << "[";
1516  vector<NodeId> parents, children;
1517  vector<NodeId>::const_iterator pit;
1518 
1519  parents.push_back(0); // root
1520 
1521  while(!parents.empty())
1522  {
1523  NodeId pid = parents.back();
1524  parents.pop_back();
1525 
1526  const Node& parent = m_nodes[pid];
1527  children = parent.children;
1528 
1529  for(pit = children.begin(); pit != children.end(); pit++)
1530  {
1531  const Node& child = m_nodes[*pit];
1532 
1533  // save node data
1534  f << "{:";
1535  f << "nodeId" << (int)child.id;
1536  f << "parentId" << (int)pid;
1537  f << "weight" << (double)child.weight;
1538  f << "descriptor" << F::toString(child.descriptor);
1539  f << "}";
1540 
1541  // add to parent list
1542  if(!child.isLeaf())
1543  {
1544  parents.push_back(*pit);
1545  }
1546  }
1547  }
1548 
1549  f << "]"; // nodes
1550 
1551  // words
1552  f << "words" << "[";
1553 
1554  typename vector<Node*>::const_iterator wit;
1555  for(wit = m_words.begin(); wit != m_words.end(); wit++)
1556  {
1557  WordId id = wit - m_words.begin();
1558  f << "{:";
1559  f << "wordId" << (int)id;
1560  f << "nodeId" << (int)(*wit)->id;
1561  f << "}";
1562  }
1563 
1564  f << "]"; // words
1565 
1566  f << "}";
1567 
1568 }
1569 
1570 // --------------------------------------------------------------------------
1571 
1572 template<class TDescriptor, class F>
1573 void TemplatedVocabulary<TDescriptor,F>::load(const cv::FileStorage &fs,
1574  const std::string &name)
1575 {
1576  m_words.clear();
1577  m_nodes.clear();
1578 
1579  cv::FileNode fvoc = fs[name];
1580 
1581  m_k = (int)fvoc["k"];
1582  m_L = (int)fvoc["L"];
1583  m_scoring = (ScoringType)((int)fvoc["scoringType"]);
1584  m_weighting = (WeightingType)((int)fvoc["weightingType"]);
1585 
1587 
1588  // nodes
1589  cv::FileNode fn = fvoc["nodes"];
1590 
1591  m_nodes.resize(fn.size() + 1); // +1 to include root
1592  m_nodes[0].id = 0;
1593 
1594  for(unsigned int i = 0; i < fn.size(); ++i)
1595  {
1596  NodeId nid = (int)fn[i]["nodeId"];
1597  NodeId pid = (int)fn[i]["parentId"];
1598  WordValue weight = (WordValue)fn[i]["weight"];
1599  string d = (string)fn[i]["descriptor"];
1600 
1601  m_nodes[nid].id = nid;
1602  m_nodes[nid].parent = pid;
1603  m_nodes[nid].weight = weight;
1604  m_nodes[pid].children.push_back(nid);
1605 
1606  F::fromString(m_nodes[nid].descriptor, d);
1607  }
1608 
1609  // words
1610  fn = fvoc["words"];
1611 
1612  m_words.resize(fn.size());
1613 
1614  for(unsigned int i = 0; i < fn.size(); ++i)
1615  {
1616  NodeId wid = (int)fn[i]["wordId"];
1617  NodeId nid = (int)fn[i]["nodeId"];
1618 
1619  m_nodes[nid].word_id = wid;
1620  m_words[wid] = &m_nodes[nid];
1621  }
1622 }
1623 
1624 // --------------------------------------------------------------------------
1625 
1631 template<class TDescriptor, class F>
1632 std::ostream& operator<<(std::ostream &os,
1634 {
1635  os << "Vocabulary: k = " << voc.getBranchingFactor()
1636  << ", L = " << voc.getDepthLevels()
1637  << ", Weighting = ";
1638 
1639  switch(voc.getWeightingType())
1640  {
1641  case TF_IDF: os << "tf-idf"; break;
1642  case TF: os << "tf"; break;
1643  case IDF: os << "idf"; break;
1644  case BINARY: os << "binary"; break;
1645  }
1646 
1647  os << ", Scoring = ";
1648  switch(voc.getScoringType())
1649  {
1650  case L1_NORM: os << "L1-norm"; break;
1651  case L2_NORM: os << "L2-norm"; break;
1652  case CHI_SQUARE: os << "Chi square distance"; break;
1653  case KL: os << "KL-divergence"; break;
1654  case BHATTACHARYYA: os << "Bhattacharyya coefficient"; break;
1655  case DOT_PRODUCT: os << "Dot product"; break;
1656  }
1657 
1658  os << ", Number of words = " << voc.size();
1659 
1660  return os;
1661 }
1662 
1663 } // namespace DBoW2
1664 
1665 #endif
d
double score(const BowVector &a, const BowVector &b) const
virtual void initiateClusters(const vector< pDescriptor > &descriptors, vector< TDescriptor > &clusters) const
std::vector< Node > m_nodes
Tree nodes.
ScoringType getScoringType() const
void getFeatures(const vector< vector< TDescriptor > > &training_features, vector< pDescriptor > &features) const
filename
WeightingType
Weighting type.
Definition: BowVector.h:36
virtual TDescriptor getWord(WordId wid) const
WeightingType m_weighting
Weighting method.
static void SeedRandOnce()
Definition: Random.cpp:24
void load(const std::string &filename)
TDescriptor descriptor
Node descriptor.
WordValue weight
Weight if the node is a word.
f
ScoringType
Scoring type.
Definition: BowVector.h:45
void saveToTextFile(const std::string &filename) const
XmlRpcServer s
void getWordsFromNode(NodeId nid, std::vector< WordId > &words) const
virtual NodeId getParentNode(WordId wid, int levelsup) const
void addWeight(WordId id, WordValue v)
Definition: BowVector.cpp:34
void initiateClustersKMpp(const vector< pDescriptor > &descriptors, vector< TDescriptor > &clusters) const
GeneralScoring * m_scoring_object
Object for computing scores.
virtual double score(const BowVector &v, const BowVector &w) const =0
void HKmeansStep(NodeId parent_id, const vector< pDescriptor > &descriptors, int current_level)
virtual bool mustNormalize(LNorm &norm) const =0
void normalize(LNorm norm_type)
Definition: BowVector.cpp:62
vector< NodeId > children
Children.
virtual void transform(const std::vector< TDescriptor > &features, BowVector &v) const
Base class of scoring functions.
Definition: ScoringObject.h:18
NodeId parent
Parent node (undefined in case of root)
virtual unsigned int size() const
WeightingType getWeightingType() const
ScoringType m_scoring
Scoring method.
LNorm
L-norms for normalization.
Definition: BowVector.h:29
std::vector< Node * > m_words
bool loadFromTextFile(const std::string &filename)
Vector of words to represent images.
Definition: BowVector.h:56
double WordValue
Value of a word.
Definition: BowVector.h:23
Vector of nodes with indexes of local features.
Definition: FeatureVector.h:21
static int RandomInt(int min, int max)
Definition: Random.cpp:47
unsigned int WordId
Id of words.
Definition: BowVector.h:20
std::ostream & operator<<(std::ostream &out, const BowVector &v)
Definition: BowVector.cpp:88
TFSIMD_FORCE_INLINE const tfScalar & w() const
void addIfNotExist(WordId id, WordValue v)
Definition: BowVector.cpp:50
const TDescriptor * pDescriptor
Pointer to descriptor.
void save(const std::string &filename) const
WordId word_id
Word id if the node is a word.
virtual int stopWords(double minWeight)
virtual void create(const std::vector< std::vector< TDescriptor > > &training_features)
virtual WordValue getWordWeight(WordId wid) const
void setNodeWeights(const vector< vector< TDescriptor > > &features)
void addFeature(NodeId id, unsigned int i_feature)
unsigned int NodeId
Id of nodes in the vocabulary treee.
Definition: BowVector.h:26


orb_slam2_with_maps_odom
Author(s): teng zhang
autogenerated on Fri Sep 25 2020 03:24:47