TemplatedVocabulary.h
Go to the documentation of this file.
1 
17 #ifndef __D_T_TEMPLATED_VOCABULARY__
18 #define __D_T_TEMPLATED_VOCABULARY__
19 
20 #include <cassert>
21 
22 #include <vector>
23 #include <numeric>
24 #include <fstream>
25 #include <string>
26 #include <algorithm>
27 #include <opencv2/core/core.hpp>
28 #include <limits>
29 
30 #include "FeatureVector.h"
31 #include "BowVector.h"
32 #include "ScoringObject.h"
33 
34 #include "../DUtils/Random.h"
35 
36 using namespace std;
37 
38 namespace DBoW2 {
39 
42 template<class TDescriptor, class F>
45 {
46 public:
47 
55  TemplatedVocabulary(int k = 10, int L = 5,
56  WeightingType weighting = TF_IDF, ScoringType scoring = L1_NORM);
57 
62  TemplatedVocabulary(const std::string &filename);
63 
68  TemplatedVocabulary(const char *filename);
69 
75 
79  virtual ~TemplatedVocabulary();
80 
89 
95  virtual void create
96  (const std::vector<std::vector<TDescriptor> > &training_features);
97 
105  virtual void create
106  (const std::vector<std::vector<TDescriptor> > &training_features,
107  int k, int L);
108 
114  virtual void create
115  (const std::vector<std::vector<TDescriptor> > &training_features,
116  int k, int L, WeightingType weighting, ScoringType scoring);
117 
122  virtual inline unsigned int size() const;
123 
128  virtual inline bool empty() const;
129 
135  virtual void transform(const std::vector<TDescriptor>& features, BowVector &v)
136  const;
137 
145  virtual void transform(const std::vector<TDescriptor>& features,
146  BowVector &v, FeatureVector &fv, int levelsup) const;
147 
153  virtual WordId transform(const TDescriptor& feature) const;
154 
162  inline double score(const BowVector &a, const BowVector &b) const;
163 
171  virtual NodeId getParentNode(WordId wid, int levelsup) const;
172 
179  void getWordsFromNode(NodeId nid, std::vector<WordId> &words) const;
180 
185  inline int getBranchingFactor() const { return m_k; }
186 
191  inline int getDepthLevels() const { return m_L; }
192 
197  float getEffectiveLevels() const;
198 
204  virtual inline TDescriptor getWord(WordId wid) const;
205 
211  virtual inline WordValue getWordWeight(WordId wid) const;
212 
217  inline WeightingType getWeightingType() const { return m_weighting; }
218 
223  inline ScoringType getScoringType() const { return m_scoring; }
224 
229  inline void setWeightingType(WeightingType type);
230 
235  void setScoringType(ScoringType type);
236 
241  bool loadFromTextFile(const std::string &filename);
242 
247  void saveToTextFile(const std::string &filename) const;
248 
253  bool loadFromBinFile(const std::string &filename);
254 
255 
260  void saveToBinFile(const std::string &filename) const;
261 
266  void save(const std::string &filename) const;
267 
272  void load(const std::string &filename);
273 
278  virtual void save(cv::FileStorage &fs,
279  const std::string &name = "vocabulary") const;
280 
287  virtual void load(const cv::FileStorage &fs,
288  const std::string &name = "vocabulary");
289 
302  virtual int stopWords(double minWeight);
303 
304 protected:
305 
307  typedef const TDescriptor *pDescriptor;
308 
310  struct Node
311  {
317  vector<NodeId> children;
321  TDescriptor descriptor;
322 
325 
329  Node(): id(0), weight(0), parent(0), word_id(0){}
330 
335  Node(NodeId _id): id(_id), weight(0), parent(0), word_id(0){}
336 
341  inline bool isLeaf() const { return children.empty(); }
342  };
343 
344 protected:
345 
349  void createScoringObject();
350 
356  void getFeatures(
357  const vector<vector<TDescriptor> > &training_features,
358  vector<pDescriptor> &features) const;
359 
368  virtual void transform(const TDescriptor &feature,
369  WordId &id, WordValue &weight, NodeId* nid = NULL, int levelsup = 0) const;
370 
376  virtual void transform(const TDescriptor &feature, WordId &id) const;
377 
385  void HKmeansStep(NodeId parent_id, const vector<pDescriptor> &descriptors,
386  int current_level);
387 
393  virtual void initiateClusters(const vector<pDescriptor> &descriptors,
394  vector<TDescriptor> &clusters) const;
395 
402  void initiateClustersKMpp(const vector<pDescriptor> &descriptors,
403  vector<TDescriptor> &clusters) const;
404 
408  void createWords();
409 
416  void setNodeWeights(const vector<vector<TDescriptor> > &features);
417 
418 protected:
419 
421  int m_k;
422 
424  int m_L;
425 
428 
431 
434 
436  std::vector<Node> m_nodes;
437 
440  std::vector<Node*> m_words;
441 
442 };
443 
444 // --------------------------------------------------------------------------
445 
446 template<class TDescriptor, class F>
448  (int k, int L, WeightingType weighting, ScoringType scoring)
449  : m_k(k), m_L(L), m_weighting(weighting), m_scoring(scoring),
450  m_scoring_object(NULL)
451 {
452  createScoringObject();
453 }
454 
455 // --------------------------------------------------------------------------
456 
457 template<class TDescriptor, class F>
459  (const std::string &filename): m_scoring_object(NULL)
460 {
461  load(filename);
462 }
463 
464 // --------------------------------------------------------------------------
465 
466 template<class TDescriptor, class F>
468  (const char *filename): m_scoring_object(NULL)
469 {
470  load(filename);
471 }
472 
473 // --------------------------------------------------------------------------
474 
475 template<class TDescriptor, class F>
477 {
478  delete m_scoring_object;
479  m_scoring_object = NULL;
480 
481  switch(m_scoring)
482  {
483  case L1_NORM:
484  m_scoring_object = new L1Scoring;
485  break;
486 
487  case L2_NORM:
488  m_scoring_object = new L2Scoring;
489  break;
490 
491  case CHI_SQUARE:
492  m_scoring_object = new ChiSquareScoring;
493  break;
494 
495  case KL:
496  m_scoring_object = new KLScoring;
497  break;
498 
499  case BHATTACHARYYA:
500  m_scoring_object = new BhattacharyyaScoring;
501  break;
502 
503  case DOT_PRODUCT:
504  m_scoring_object = new DotProductScoring;
505  break;
506 
507  }
508 }
509 
510 // --------------------------------------------------------------------------
511 
512 template<class TDescriptor, class F>
514 {
515  m_scoring = type;
516  createScoringObject();
517 }
518 
519 // --------------------------------------------------------------------------
520 
521 template<class TDescriptor, class F>
523 {
524  this->m_weighting = type;
525 }
526 
527 // --------------------------------------------------------------------------
528 
529 template<class TDescriptor, class F>
532  : m_scoring_object(NULL)
533 {
534  *this = voc;
535 }
536 
537 // --------------------------------------------------------------------------
538 
539 template<class TDescriptor, class F>
541 {
542  delete m_scoring_object;
543 }
544 
545 // --------------------------------------------------------------------------
546 
547 template<class TDescriptor, class F>
551 {
552  this->m_k = voc.m_k;
553  this->m_L = voc.m_L;
554  this->m_scoring = voc.m_scoring;
555  this->m_weighting = voc.m_weighting;
556 
557  this->createScoringObject();
558 
559  this->m_nodes.clear();
560  this->m_words.clear();
561 
562  this->m_nodes = voc.m_nodes;
563  this->createWords();
564 
565  return *this;
566 }
567 
568 // --------------------------------------------------------------------------
569 
570 template<class TDescriptor, class F>
572  const std::vector<std::vector<TDescriptor> > &training_features)
573 {
574  m_nodes.clear();
575  m_words.clear();
576 
577  // expected_nodes = Sum_{i=0..L} ( k^i )
578  int expected_nodes =
579  (int)((pow((double)m_k, (double)m_L + 1) - 1)/(m_k - 1));
580 
581  m_nodes.reserve(expected_nodes); // avoid allocations when creating the tree
582 
583 
584  vector<pDescriptor> features;
585  getFeatures(training_features, features);
586 
587 
588  // create root
589  m_nodes.push_back(Node(0)); // root
590 
591  // create the tree
592  HKmeansStep(0, features, 1);
593 
594  // create the words
595  createWords();
596 
597  // and set the weight of each node of the tree
598  setNodeWeights(training_features);
599 
600 }
601 
602 // --------------------------------------------------------------------------
603 
604 template<class TDescriptor, class F>
606  const std::vector<std::vector<TDescriptor> > &training_features,
607  int k, int L)
608 {
609  m_k = k;
610  m_L = L;
611 
612  create(training_features);
613 }
614 
615 // --------------------------------------------------------------------------
616 
617 template<class TDescriptor, class F>
619  const std::vector<std::vector<TDescriptor> > &training_features,
620  int k, int L, WeightingType weighting, ScoringType scoring)
621 {
622  m_k = k;
623  m_L = L;
624  m_weighting = weighting;
625  m_scoring = scoring;
627 
628  create(training_features);
629 }
630 
631 // --------------------------------------------------------------------------
632 
633 template<class TDescriptor, class F>
635  const vector<vector<TDescriptor> > &training_features,
636  vector<pDescriptor> &features) const
637 {
638  features.resize(0);
639 
640  typename vector<vector<TDescriptor> >::const_iterator vvit;
641  typename vector<TDescriptor>::const_iterator vit;
642  for(vvit = training_features.begin(); vvit != training_features.end(); ++vvit)
643  {
644  features.reserve(features.size() + vvit->size());
645  for(vit = vvit->begin(); vit != vvit->end(); ++vit)
646  {
647  features.push_back(&(*vit));
648  }
649  }
650 }
651 
652 // --------------------------------------------------------------------------
653 
654 template<class TDescriptor, class F>
656  const vector<pDescriptor> &descriptors, int current_level)
657 {
658  if(descriptors.empty()) return;
659 
660  // features associated to each cluster
661  vector<TDescriptor> clusters;
662  vector<vector<unsigned int> > groups; // groups[i] = [j1, j2, ...]
663  // j1, j2, ... indices of descriptors associated to cluster i
664 
665  clusters.reserve(m_k);
666  groups.reserve(m_k);
667 
668  //const int msizes[] = { m_k, descriptors.size() };
669  //cv::SparseMat assoc(2, msizes, CV_8U);
670  //cv::SparseMat last_assoc(2, msizes, CV_8U);
672 
673  if((int)descriptors.size() <= m_k)
674  {
675  // trivial case: one cluster per feature
676  groups.resize(descriptors.size());
677 
678  for(unsigned int i = 0; i < descriptors.size(); i++)
679  {
680  groups[i].push_back(i);
681  clusters.push_back(*descriptors[i]);
682  }
683  }
684  else
685  {
686  // select clusters and groups with kmeans
687 
688  bool first_time = true;
689  bool goon = true;
690 
691  // to check if clusters move after iterations
692  vector<int> last_association, current_association;
693 
694  while(goon)
695  {
696  // 1. Calculate clusters
697 
698  if(first_time)
699  {
700  // random sample
701  initiateClusters(descriptors, clusters);
702  }
703  else
704  {
705  // calculate cluster centres
706 
707  for(unsigned int c = 0; c < clusters.size(); ++c)
708  {
709  vector<pDescriptor> cluster_descriptors;
710  cluster_descriptors.reserve(groups[c].size());
711 
712  /*
713  for(unsigned int d = 0; d < descriptors.size(); ++d)
714  {
715  if( assoc.find<unsigned char>(c, d) )
716  {
717  cluster_descriptors.push_back(descriptors[d]);
718  }
719  }
720  */
721 
722  vector<unsigned int>::const_iterator vit;
723  for(vit = groups[c].begin(); vit != groups[c].end(); ++vit)
724  {
725  cluster_descriptors.push_back(descriptors[*vit]);
726  }
727 
728 
729  F::meanValue(cluster_descriptors, clusters[c]);
730  }
731 
732  } // if(!first_time)
733 
734  // 2. Associate features with clusters
735 
736  // calculate distances to cluster centers
737  groups.clear();
738  groups.resize(clusters.size(), vector<unsigned int>());
739  current_association.resize(descriptors.size());
740 
741  //assoc.clear();
742 
743  typename vector<pDescriptor>::const_iterator fit;
744  //unsigned int d = 0;
745  for(fit = descriptors.begin(); fit != descriptors.end(); ++fit)//, ++d)
746  {
747  double best_dist = F::distance(*(*fit), clusters[0]);
748  unsigned int icluster = 0;
749 
750  for(unsigned int c = 1; c < clusters.size(); ++c)
751  {
752  double dist = F::distance(*(*fit), clusters[c]);
753  if(dist < best_dist)
754  {
755  best_dist = dist;
756  icluster = c;
757  }
758  }
759 
760  //assoc.ref<unsigned char>(icluster, d) = 1;
761 
762  groups[icluster].push_back(fit - descriptors.begin());
763  current_association[ fit - descriptors.begin() ] = icluster;
764  }
765 
766  // kmeans++ ensures all the clusters has any feature associated with them
767 
768  // 3. check convergence
769  if(first_time)
770  {
771  first_time = false;
772  }
773  else
774  {
775  //goon = !eqUChar(last_assoc, assoc);
776 
777  goon = false;
778  for(unsigned int i = 0; i < current_association.size(); i++)
779  {
780  if(current_association[i] != last_association[i]){
781  goon = true;
782  break;
783  }
784  }
785  }
786 
787  if(goon)
788  {
789  // copy last feature-cluster association
790  last_association = current_association;
791  //last_assoc = assoc.clone();
792  }
793 
794  } // while(goon)
795 
796  } // if must run kmeans
797 
798  // create nodes
799  for(unsigned int i = 0; i < clusters.size(); ++i)
800  {
801  NodeId id = m_nodes.size();
802  m_nodes.push_back(Node(id));
803  m_nodes.back().descriptor = clusters[i];
804  m_nodes.back().parent = parent_id;
805  m_nodes[parent_id].children.push_back(id);
806  }
807 
808  // go on with the next level
809  if(current_level < m_L)
810  {
811  // iterate again with the resulting clusters
812  const vector<NodeId> &children_ids = m_nodes[parent_id].children;
813  for(unsigned int i = 0; i < clusters.size(); ++i)
814  {
815  NodeId id = children_ids[i];
816 
817  vector<pDescriptor> child_features;
818  child_features.reserve(groups[i].size());
819 
820  vector<unsigned int>::const_iterator vit;
821  for(vit = groups[i].begin(); vit != groups[i].end(); ++vit)
822  {
823  child_features.push_back(descriptors[*vit]);
824  }
825 
826  if(child_features.size() > 1)
827  {
828  HKmeansStep(id, child_features, current_level + 1);
829  }
830  }
831  }
832 }
833 
834 // --------------------------------------------------------------------------
835 
836 template<class TDescriptor, class F>
838  (const vector<pDescriptor> &descriptors, vector<TDescriptor> &clusters) const
839 {
840  initiateClustersKMpp(descriptors, clusters);
841 }
842 
843 // --------------------------------------------------------------------------
844 
845 template<class TDescriptor, class F>
847  const vector<pDescriptor> &pfeatures, vector<TDescriptor> &clusters) const
848 {
849  // Implements kmeans++ seeding algorithm
850  // Algorithm:
851  // 1. Choose one center uniformly at random from among the data points.
852  // 2. For each data point x, compute D(x), the distance between x and the nearest
853  // center that has already been chosen.
854  // 3. Add one new data point as a center. Each point x is chosen with probability
855  // proportional to D(x)^2.
856  // 4. Repeat Steps 2 and 3 until k centers have been chosen.
857  // 5. Now that the initial centers have been chosen, proceed using standard k-means
858  // clustering.
859 
861 
862  clusters.resize(0);
863  clusters.reserve(m_k);
864  vector<double> min_dists(pfeatures.size(), std::numeric_limits<double>::max());
865 
866  // 1.
867 
868  int ifeature = DUtils::Random::RandomInt(0, pfeatures.size()-1);
869 
870  // create first cluster
871  clusters.push_back(*pfeatures[ifeature]);
872 
873  // compute the initial distances
874  typename vector<pDescriptor>::const_iterator fit;
875  vector<double>::iterator dit;
876  dit = min_dists.begin();
877  for(fit = pfeatures.begin(); fit != pfeatures.end(); ++fit, ++dit)
878  {
879  *dit = F::distance(*(*fit), clusters.back());
880  }
881 
882  while((int)clusters.size() < m_k)
883  {
884  // 2.
885  dit = min_dists.begin();
886  for(fit = pfeatures.begin(); fit != pfeatures.end(); ++fit, ++dit)
887  {
888  if(*dit > 0)
889  {
890  double dist = F::distance(*(*fit), clusters.back());
891  if(dist < *dit) *dit = dist;
892  }
893  }
894 
895  // 3.
896  double dist_sum = std::accumulate(min_dists.begin(), min_dists.end(), 0.0);
897 
898  if(dist_sum > 0)
899  {
900  double cut_d;
901  do
902  {
903  cut_d = DUtils::Random::RandomValue<double>(0, dist_sum);
904  } while(cut_d == 0.0);
905 
906  double d_up_now = 0;
907  for(dit = min_dists.begin(); dit != min_dists.end(); ++dit)
908  {
909  d_up_now += *dit;
910  if(d_up_now >= cut_d) break;
911  }
912 
913  if(dit == min_dists.end())
914  ifeature = pfeatures.size()-1;
915  else
916  ifeature = dit - min_dists.begin();
917 
918  clusters.push_back(*pfeatures[ifeature]);
919 
920  } // if dist_sum > 0
921  else
922  break;
923 
924  } // while(used_clusters < m_k)
925 
926 }
927 
928 // --------------------------------------------------------------------------
929 
930 template<class TDescriptor, class F>
932 {
933  m_words.resize(0);
934 
935  if(!m_nodes.empty())
936  {
937  m_words.reserve( (int)pow((double)m_k, (double)m_L) );
938 
939  typename vector<Node>::iterator nit;
940 
941  nit = m_nodes.begin(); // ignore root
942  for(++nit; nit != m_nodes.end(); ++nit)
943  {
944  if(nit->isLeaf())
945  {
946  nit->word_id = m_words.size();
947  m_words.push_back( &(*nit) );
948  }
949  }
950  }
951 }
952 
953 // --------------------------------------------------------------------------
954 
955 template<class TDescriptor, class F>
957  (const vector<vector<TDescriptor> > &training_features)
958 {
959  const unsigned int NWords = m_words.size();
960  const unsigned int NDocs = training_features.size();
961 
962  if(m_weighting == TF || m_weighting == BINARY)
963  {
964  // idf part must be 1 always
965  for(unsigned int i = 0; i < NWords; i++)
966  m_words[i]->weight = 1;
967  }
968  else if(m_weighting == IDF || m_weighting == TF_IDF)
969  {
970  // IDF and TF-IDF: we calculte the idf path now
971 
972  // Note: this actually calculates the idf part of the tf-idf score.
973  // The complete tf-idf score is calculated in ::transform
974 
975  vector<unsigned int> Ni(NWords, 0);
976  vector<bool> counted(NWords, false);
977 
978  typename vector<vector<TDescriptor> >::const_iterator mit;
979  typename vector<TDescriptor>::const_iterator fit;
980 
981  for(mit = training_features.begin(); mit != training_features.end(); ++mit)
982  {
983  fill(counted.begin(), counted.end(), false);
984 
985  for(fit = mit->begin(); fit < mit->end(); ++fit)
986  {
987  WordId word_id;
988  transform(*fit, word_id);
989 
990  if(!counted[word_id])
991  {
992  Ni[word_id]++;
993  counted[word_id] = true;
994  }
995  }
996  }
997 
998  // set ln(N/Ni)
999  for(unsigned int i = 0; i < NWords; i++)
1000  {
1001  if(Ni[i] > 0)
1002  {
1003  m_words[i]->weight = log((double)NDocs / (double)Ni[i]);
1004  }// else // This cannot occur if using kmeans++
1005  }
1006 
1007  }
1008 
1009 }
1010 
1011 // --------------------------------------------------------------------------
1012 
1013 template<class TDescriptor, class F>
1014 inline unsigned int TemplatedVocabulary<TDescriptor,F>::size() const
1015 {
1016  return m_words.size();
1017 }
1018 
1019 // --------------------------------------------------------------------------
1020 
1021 template<class TDescriptor, class F>
1023 {
1024  return m_words.empty();
1025 }
1026 
1027 // --------------------------------------------------------------------------
1028 
1029 template<class TDescriptor, class F>
1031 {
1032  long sum = 0;
1033  typename std::vector<Node*>::const_iterator wit;
1034  for(wit = m_words.begin(); wit != m_words.end(); ++wit)
1035  {
1036  const Node *p = *wit;
1037 
1038  for(; p->id != 0; sum++) p = &m_nodes[p->parent];
1039  }
1040 
1041  return (float)((double)sum / (double)m_words.size());
1042 }
1043 
1044 // --------------------------------------------------------------------------
1045 
1046 template<class TDescriptor, class F>
1048 {
1049  return m_words[wid]->descriptor;
1050 }
1051 
1052 // --------------------------------------------------------------------------
1053 
1054 template<class TDescriptor, class F>
1056 {
1057  return m_words[wid]->weight;
1058 }
1059 
1060 // --------------------------------------------------------------------------
1061 
1062 template<class TDescriptor, class F>
1064  (const TDescriptor& feature) const
1065 {
1066  if(empty())
1067  {
1068  return 0;
1069  }
1070 
1071  WordId wid;
1072  transform(feature, wid);
1073  return wid;
1074 }
1075 
1076 // --------------------------------------------------------------------------
1077 
1078 template<class TDescriptor, class F>
1080  const std::vector<TDescriptor>& features, BowVector &v) const
1081 {
1082  v.clear();
1083 
1084  if(empty())
1085  {
1086  return;
1087  }
1088 
1089  // normalize
1090  LNorm norm;
1091  bool must = m_scoring_object->mustNormalize(norm);
1092 
1093  typename vector<TDescriptor>::const_iterator fit;
1094 
1095  if(m_weighting == TF || m_weighting == TF_IDF)
1096  {
1097  for(fit = features.begin(); fit < features.end(); ++fit)
1098  {
1099  WordId id;
1100  WordValue w;
1101  // w is the idf value if TF_IDF, 1 if TF
1102 
1103  transform(*fit, id, w);
1104 
1105  // not stopped
1106  if(w > 0) v.addWeight(id, w);
1107  }
1108 
1109  if(!v.empty() && !must)
1110  {
1111  // unnecessary when normalizing
1112  const double nd = v.size();
1113  for(BowVector::iterator vit = v.begin(); vit != v.end(); vit++)
1114  vit->second /= nd;
1115  }
1116 
1117  }
1118  else // IDF || BINARY
1119  {
1120  for(fit = features.begin(); fit < features.end(); ++fit)
1121  {
1122  WordId id;
1123  WordValue w;
1124  // w is idf if IDF, or 1 if BINARY
1125 
1126  transform(*fit, id, w);
1127 
1128  // not stopped
1129  if(w > 0) v.addIfNotExist(id, w);
1130 
1131  } // if add_features
1132  } // if m_weighting == ...
1133 
1134  if(must) v.normalize(norm);
1135 }
1136 
1137 // --------------------------------------------------------------------------
1138 
1139 template<class TDescriptor, class F>
1141  const std::vector<TDescriptor>& features,
1142  BowVector &v, FeatureVector &fv, int levelsup) const
1143 {
1144  v.clear();
1145  fv.clear();
1146 
1147  if(empty()) // safe for subclasses
1148  {
1149  return;
1150  }
1151 
1152  // normalize
1153  LNorm norm;
1154  bool must = m_scoring_object->mustNormalize(norm);
1155 
1156  typename vector<TDescriptor>::const_iterator fit;
1157 
1158  if(m_weighting == TF || m_weighting == TF_IDF)
1159  {
1160  unsigned int i_feature = 0;
1161  for(fit = features.begin(); fit < features.end(); ++fit, ++i_feature)
1162  {
1163  WordId id;
1164  NodeId nid;
1165  WordValue w;
1166  // w is the idf value if TF_IDF, 1 if TF
1167 
1168  transform(*fit, id, w, &nid, levelsup);
1169 
1170  if(w > 0) // not stopped
1171  {
1172  v.addWeight(id, w);
1173  fv.addFeature(nid, i_feature);
1174  }
1175  }
1176 
1177  if(!v.empty() && !must)
1178  {
1179  // unnecessary when normalizing
1180  const double nd = v.size();
1181  for(BowVector::iterator vit = v.begin(); vit != v.end(); vit++)
1182  vit->second /= nd;
1183  }
1184 
1185  }
1186  else // IDF || BINARY
1187  {
1188  unsigned int i_feature = 0;
1189  for(fit = features.begin(); fit < features.end(); ++fit, ++i_feature)
1190  {
1191  WordId id;
1192  NodeId nid;
1193  WordValue w;
1194  // w is idf if IDF, or 1 if BINARY
1195 
1196  transform(*fit, id, w, &nid, levelsup);
1197 
1198  if(w > 0) // not stopped
1199  {
1200  v.addIfNotExist(id, w);
1201  fv.addFeature(nid, i_feature);
1202  }
1203  }
1204  } // if m_weighting == ...
1205 
1206  if(must) v.normalize(norm);
1207 }
1208 
1209 // --------------------------------------------------------------------------
1210 
1211 template<class TDescriptor, class F>
1213  (const BowVector &v1, const BowVector &v2) const
1214 {
1215  return m_scoring_object->score(v1, v2);
1216 }
1217 
1218 // --------------------------------------------------------------------------
1219 
1220 template<class TDescriptor, class F>
1222  (const TDescriptor &feature, WordId &id) const
1223 {
1224  WordValue weight;
1225  transform(feature, id, weight);
1226 }
1227 
1228 // --------------------------------------------------------------------------
1229 
1230 template<class TDescriptor, class F>
1231 void TemplatedVocabulary<TDescriptor,F>::transform(const TDescriptor &feature,
1232  WordId &word_id, WordValue &weight, NodeId *nid, int levelsup) const
1233 {
1234  // propagate the feature down the tree
1235  vector<NodeId> nodes;
1236  typename vector<NodeId>::const_iterator nit;
1237 
1238  // level at which the node must be stored in nid, if given
1239  const int nid_level = m_L - levelsup;
1240  if(nid_level <= 0 && nid != NULL) *nid = 0; // root
1241 
1242  NodeId final_id = 0; // root
1243  int current_level = 0;
1244 
1245  do
1246  {
1247  ++current_level;
1248  nodes = m_nodes[final_id].children;
1249  final_id = nodes[0];
1250 
1251  double best_d = F::distance(feature, m_nodes[final_id].descriptor);
1252 
1253  for(nit = nodes.begin() + 1; nit != nodes.end(); ++nit)
1254  {
1255  NodeId id = *nit;
1256  double d = F::distance(feature, m_nodes[id].descriptor);
1257  if(d < best_d)
1258  {
1259  best_d = d;
1260  final_id = id;
1261  }
1262  }
1263 
1264  if(nid != NULL && current_level == nid_level)
1265  *nid = final_id;
1266 
1267  } while( !m_nodes[final_id].isLeaf() );
1268 
1269  // turn node id into word id
1270  word_id = m_nodes[final_id].word_id;
1271  weight = m_nodes[final_id].weight;
1272 }
1273 
1274 // --------------------------------------------------------------------------
1275 
1276 template<class TDescriptor, class F>
1278  (WordId wid, int levelsup) const
1279 {
1280  NodeId ret = m_words[wid]->id; // node id
1281  while(levelsup > 0 && ret != 0) // ret == 0 --> root
1282  {
1283  --levelsup;
1284  ret = m_nodes[ret].parent;
1285  }
1286  return ret;
1287 }
1288 
1289 // --------------------------------------------------------------------------
1290 
1291 template<class TDescriptor, class F>
1293  (NodeId nid, std::vector<WordId> &words) const
1294 {
1295  words.clear();
1296 
1297  if(m_nodes[nid].isLeaf())
1298  {
1299  words.push_back(m_nodes[nid].word_id);
1300  }
1301  else
1302  {
1303  words.reserve(m_k); // ^1, ^2, ...
1304 
1305  vector<NodeId> parents;
1306  parents.push_back(nid);
1307 
1308  while(!parents.empty())
1309  {
1310  NodeId parentid = parents.back();
1311  parents.pop_back();
1312 
1313  const vector<NodeId> &child_ids = m_nodes[parentid].children;
1314  vector<NodeId>::const_iterator cit;
1315 
1316  for(cit = child_ids.begin(); cit != child_ids.end(); ++cit)
1317  {
1318  const Node &child_node = m_nodes[*cit];
1319 
1320  if(child_node.isLeaf())
1321  words.push_back(child_node.word_id);
1322  else
1323  parents.push_back(*cit);
1324 
1325  } // for each child
1326  } // while !parents.empty
1327  }
1328 }
1329 
1330 // --------------------------------------------------------------------------
1331 
1332 template<class TDescriptor, class F>
1334 {
1335  int c = 0;
1336  typename vector<Node*>::iterator wit;
1337  for(wit = m_words.begin(); wit != m_words.end(); ++wit)
1338  {
1339  if((*wit)->weight < minWeight)
1340  {
1341  ++c;
1342  (*wit)->weight = 0;
1343  }
1344  }
1345  return c;
1346 }
1347 
1348 // --------------------------------------------------------------------------
1349 
1350 template<class TDescriptor, class F>
1352 {
1353  ifstream f;
1354  f.open(filename.c_str());
1355 
1356  if(f.eof()) {
1357  return false;
1358  }
1359 
1360  if(f.fail()) {
1361  std::cerr << "An error occured while opening the input stream." << endl;
1362  return false;
1363  }
1364 
1365  m_words.clear();
1366  m_nodes.clear();
1367 
1368  string s;
1369  getline(f,s);
1370  stringstream ss;
1371  ss << s;
1372  ss >> m_k;
1373  ss >> m_L;
1374  int n1, n2;
1375  ss >> n1;
1376  ss >> n2;
1377 
1378  if(m_k<0 || m_k>20 || m_L<1 || m_L>10 || n1<0 || n1>5 || n2<0 || n2>3)
1379  {
1380  std::cerr << "Vocabulary loading failure: This is not a correct text file!" << endl;
1381  return false;
1382  }
1383 
1384  m_scoring = (ScoringType)n1;
1385  m_weighting = (WeightingType)n2;
1387 
1388  // nodes
1389  int expected_nodes =
1390  (int)((pow((double)m_k, (double)m_L + 1) - 1)/(m_k - 1));
1391  m_nodes.reserve(expected_nodes);
1392 
1393  m_words.reserve(pow((double)m_k, (double)m_L + 1));
1394 
1395  m_nodes.resize(1);
1396  m_nodes[0].id = 0;
1397  while(!f.eof())
1398  {
1399  string snode;
1400  getline(f,snode);
1401  stringstream ssnode;
1402  ssnode << snode;
1403 
1404  int nid = m_nodes.size();
1405  m_nodes.resize(m_nodes.size()+1);
1406  m_nodes[nid].id = nid;
1407 
1408  int pid ;
1409  ssnode >> pid;
1410  m_nodes[nid].parent = pid;
1411  m_nodes[pid].children.push_back(nid);
1412 
1413  int nIsLeaf;
1414  ssnode >> nIsLeaf;
1415 
1416  stringstream ssd;
1417  for(int iD=0;iD<F::L;iD++)
1418  {
1419  string sElement;
1420  ssnode >> sElement;
1421  ssd << sElement << " ";
1422  }
1423  F::fromString(m_nodes[nid].descriptor, ssd.str());
1424 
1425  ssnode >> m_nodes[nid].weight;
1426 
1427  if(nIsLeaf>0)
1428  {
1429  int wid = m_words.size();
1430  m_words.resize(wid+1);
1431 
1432  m_nodes[nid].word_id = wid;
1433  m_words[wid] = &m_nodes[nid];
1434  }
1435  else
1436  {
1437  m_nodes[nid].children.reserve(m_k);
1438  }
1439  }
1440 
1441  return true;
1442 
1443 }
1444 
1445 // --------------------------------------------------------------------------
1446 
1447 template<class TDescriptor, class F>
1448 void TemplatedVocabulary<TDescriptor,F>::saveToTextFile(const std::string &filename) const
1449 {
1450  fstream f;
1451  f.open(filename.c_str(),ios_base::out);
1452  f << m_k << " " << m_L << " " << " " << m_scoring << " " << m_weighting << endl;
1453  for(size_t i=1; i<m_nodes.size()-1;i++) //changed by B. Vandeportaele from for(size_t i=1; i<m_nodes.size();i++) because the generated file was too long
1454  {
1455  const Node& node = m_nodes[i];
1456 
1457  f << node.parent << " ";
1458  if(node.isLeaf())
1459  f << 1 << " ";
1460  else
1461  f << 0 << " ";
1462 
1463  f << F::toString(node.descriptor) << " " << (double)node.weight << endl;
1464  }
1465 
1466  f.close();
1467 }
1468 
1469 // --------------------------------------------------------------------------
1470 
1471 //TODO: the file is never closed?
1472 template<class TDescriptor, class F>
1474 {
1475  ifstream f;
1476  f.open(filename.c_str());
1477 
1478  if(f.eof()) {
1479  return false;
1480  }
1481 
1482  if(f.fail()) {
1483  std::cerr << "An error occured while opening the input stream." << endl;
1484  return false;
1485  }
1486 
1487  m_words.clear();
1488  m_nodes.clear();
1489 
1490  f.read((char*)&m_k,sizeof(m_k));
1491  f.read((char*)&m_L,sizeof(m_L));
1492  int n1, n2;
1493  f.read((char*)&n1,sizeof(n1));
1494  f.read((char*)&n2,sizeof(n2));
1495 
1496  if(m_k<0 || m_k>20 || m_L<1 || m_L>10 || n1<0 || n1>5 || n2<0 || n2>3)
1497  {
1498  std::cerr << "Vocabulary loading failure: This is not a correct Binary file!" << endl;
1499  return false;
1500  }
1501 
1502  m_scoring = (ScoringType)n1;
1503  m_weighting = (WeightingType)n2;
1505 
1506  // nodes
1507  int expected_nodes =
1508  (int)((pow((double)m_k, (double)m_L + 1) - 1)/(m_k - 1));
1509  m_nodes.reserve(expected_nodes);
1510  m_words.reserve(pow((double)m_k, (double)m_L + 1));
1511  m_nodes.resize(1);
1512  m_nodes[0].id = 0;
1513  // while(!f.eof())
1514  while((!f.eof()) && ( m_nodes.size()<(unsigned int)expected_nodes) )
1515  {
1516  int nid = m_nodes.size();
1517  m_nodes.resize(m_nodes.size()+1);
1518  m_nodes[nid].id = nid;
1519  int pid ;
1520  f.read((char*)&pid,sizeof(pid));
1521  m_nodes[nid].parent = pid;
1522  m_nodes[pid].children.push_back(nid);
1523  int nIsLeaf;
1524  unsigned char nIsLeafuc;
1525  f.read((char*)&nIsLeafuc,sizeof(nIsLeafuc));
1526  nIsLeaf=nIsLeafuc;
1527  unsigned char array[F::L]; // the number of element is stored in F::L
1528  f.read((char*)array,(long)F::L);
1529  m_nodes[nid].descriptor.create(1, F::L, CV_8U);
1530  F::fromArray8U(m_nodes[nid].descriptor,(unsigned char *)array);
1531  f.read((char*)&m_nodes[nid].weight,sizeof(m_nodes[nid].weight));
1532  if(nIsLeaf>0)
1533  {
1534  int wid = m_words.size();
1535  m_words.resize(wid+1);
1536  m_nodes[nid].word_id = wid;
1537  m_words[wid] = &m_nodes[nid];
1538  }
1539  else
1540  {
1541  m_nodes[nid].children.reserve(m_k);
1542  }
1543  }
1544 
1545  return true;
1546 
1547 }
1548 // --------------------------------------------------------------------------
1549 
1550 template<class TDescriptor, class F>
1551 void TemplatedVocabulary<TDescriptor,F>::saveToBinFile(const std::string &filename) const
1552 {
1553  fstream f;
1554  f.open(filename.c_str(),ios_base::out);
1555  f.write((const char*)&m_k,sizeof(m_k));
1556  f.write((const char*)&m_L,sizeof(m_L));
1557  f.write((const char*)&m_scoring,sizeof(m_scoring));
1558  f.write((const char*)&m_weighting,sizeof(m_weighting));
1559  unsigned int nbiter=m_nodes.size();
1560  // nbiter=2; //for test
1561  unsigned char one=1;
1562  unsigned char zero=0;
1563  // unsigned char tab[10]={1,2,4,6,10,100,200,201,202,255}; f.write((const char*)tab,10); //for test
1564  for(size_t i=1; i<nbiter-1;i++)
1565  {
1566  const Node& node = m_nodes[i];
1567  f.write((const char*)& node.parent ,sizeof( node.parent ));
1568  if(node.isLeaf())
1569  f.write((const char*)& one ,sizeof( one ));
1570  else
1571  f.write((const char*)& zero ,sizeof( zero ));
1572  unsigned char array[F::L]; // the number of elements is stored in F::L
1573  F::toArray8U(node.descriptor,(unsigned char *)array);
1574  f.write((char*)array,(long)F::L);
1575  f.write((const char*)& node.weight ,sizeof( node.weight ));
1576  }
1577  f.close();
1578 }
1579 
1580 // --------------------------------------------------------------------------
1581 
1582 template<class TDescriptor, class F>
1583 void TemplatedVocabulary<TDescriptor,F>::save(const std::string &filename) const
1584 {
1585  cv::FileStorage fs(filename.c_str(), cv::FileStorage::WRITE);
1586  if(!fs.isOpened()) throw string("Could not open file ") + filename;
1587 
1588  save(fs);
1589 }
1590 
1591 // --------------------------------------------------------------------------
1592 
1593 template<class TDescriptor, class F>
1594 void TemplatedVocabulary<TDescriptor,F>::load(const std::string &filename)
1595 {
1596  cv::FileStorage fs(filename.c_str(), cv::FileStorage::READ);
1597  if(!fs.isOpened()) throw string("Could not open file ") + filename;
1598 
1599  this->load(fs);
1600 }
1601 
1602 // --------------------------------------------------------------------------
1603 
1604 template<class TDescriptor, class F>
1606  const std::string &name) const
1607 {
1608  // Format YAML:
1609  // vocabulary
1610  // {
1611  // k:
1612  // L:
1613  // scoringType:
1614  // weightingType:
1615  // nodes
1616  // [
1617  // {
1618  // nodeId:
1619  // parentId:
1620  // weight:
1621  // descriptor:
1622  // }
1623  // ]
1624  // words
1625  // [
1626  // {
1627  // wordId:
1628  // nodeId:
1629  // }
1630  // ]
1631  // }
1632  //
1633  // The root node (index 0) is not included in the node vector
1634  //
1635 
1636  f << name << "{";
1637 
1638  f << "k" << m_k;
1639  f << "L" << m_L;
1640  f << "scoringType" << m_scoring;
1641  f << "weightingType" << m_weighting;
1642 
1643  // tree
1644  f << "nodes" << "[";
1645  vector<NodeId> parents, children;
1646  vector<NodeId>::const_iterator pit;
1647 
1648  parents.push_back(0); // root
1649 
1650  while(!parents.empty())
1651  {
1652  NodeId pid = parents.back();
1653  parents.pop_back();
1654 
1655  const Node& parent = m_nodes[pid];
1656  children = parent.children;
1657 
1658  for(pit = children.begin(); pit != children.end(); pit++)
1659  {
1660  const Node& child = m_nodes[*pit];
1661 
1662  // save node data
1663  f << "{:";
1664  f << "nodeId" << (int)child.id;
1665  f << "parentId" << (int)pid;
1666  f << "weight" << (double)child.weight;
1667  f << "descriptor" << F::toString(child.descriptor);
1668  f << "}";
1669 
1670  // add to parent list
1671  if(!child.isLeaf())
1672  {
1673  parents.push_back(*pit);
1674  }
1675  }
1676  }
1677 
1678  f << "]"; // nodes
1679 
1680  // words
1681  f << "words" << "[";
1682 
1683  typename vector<Node*>::const_iterator wit;
1684  for(wit = m_words.begin(); wit != m_words.end(); wit++)
1685  {
1686  WordId id = wit - m_words.begin();
1687  f << "{:";
1688  f << "wordId" << (int)id;
1689  f << "nodeId" << (int)(*wit)->id;
1690  f << "}";
1691  }
1692 
1693  f << "]"; // words
1694 
1695  f << "}";
1696 
1697 }
1698 
1699 // --------------------------------------------------------------------------
1700 
1701 template<class TDescriptor, class F>
1702 void TemplatedVocabulary<TDescriptor,F>::load(const cv::FileStorage &fs,
1703  const std::string &name)
1704 {
1705  m_words.clear();
1706  m_nodes.clear();
1707 
1708  cv::FileNode fvoc = fs[name];
1709 
1710  m_k = (int)fvoc["k"];
1711  m_L = (int)fvoc["L"];
1712  m_scoring = (ScoringType)((int)fvoc["scoringType"]);
1713  m_weighting = (WeightingType)((int)fvoc["weightingType"]);
1714 
1716 
1717  // nodes
1718  cv::FileNode fn = fvoc["nodes"];
1719 
1720  m_nodes.resize(fn.size() + 1); // +1 to include root
1721  m_nodes[0].id = 0;
1722 
1723  for(unsigned int i = 0; i < fn.size(); ++i)
1724  {
1725  NodeId nid = (int)fn[i]["nodeId"];
1726  NodeId pid = (int)fn[i]["parentId"];
1727  WordValue weight = (WordValue)fn[i]["weight"];
1728  string d = (string)fn[i]["descriptor"];
1729 
1730  m_nodes[nid].id = nid;
1731  m_nodes[nid].parent = pid;
1732  m_nodes[nid].weight = weight;
1733  m_nodes[pid].children.push_back(nid);
1734 
1735  F::fromString(m_nodes[nid].descriptor, d);
1736  }
1737 
1738  // words
1739  fn = fvoc["words"];
1740 
1741  m_words.resize(fn.size());
1742 
1743  for(unsigned int i = 0; i < fn.size(); ++i)
1744  {
1745  NodeId wid = (int)fn[i]["wordId"];
1746  NodeId nid = (int)fn[i]["nodeId"];
1747 
1748  m_nodes[nid].word_id = wid;
1749  m_words[wid] = &m_nodes[nid];
1750  }
1751 }
1752 
1753 // --------------------------------------------------------------------------
1754 
1760 template<class TDescriptor, class F>
1761 std::ostream& operator<<(std::ostream &os,
1763 {
1764  os << "Vocabulary: k = " << voc.getBranchingFactor()
1765  << ", L = " << voc.getDepthLevels()
1766  << ", Weighting = ";
1767 
1768  switch(voc.getWeightingType())
1769  {
1770  case TF_IDF: os << "tf-idf"; break;
1771  case TF: os << "tf"; break;
1772  case IDF: os << "idf"; break;
1773  case BINARY: os << "binary"; break;
1774  }
1775 
1776  os << ", Scoring = ";
1777  switch(voc.getScoringType())
1778  {
1779  case L1_NORM: os << "L1-norm"; break;
1780  case L2_NORM: os << "L2-norm"; break;
1781  case CHI_SQUARE: os << "Chi square distance"; break;
1782  case KL: os << "KL-divergence"; break;
1783  case BHATTACHARYYA: os << "Bhattacharyya coefficient"; break;
1784  case DOT_PRODUCT: os << "Dot product"; break;
1785  }
1786 
1787  os << ", Number of words = " << voc.size();
1788 
1789  return os;
1790 }
1791 
1792 } // namespace DBoW2
1793 
1794 #endif
d
double score(const BowVector &a, const BowVector &b) const
virtual void initiateClusters(const vector< pDescriptor > &descriptors, vector< TDescriptor > &clusters) const
std::vector< Node > m_nodes
Tree nodes.
ScoringType getScoringType() const
void getFeatures(const vector< vector< TDescriptor > > &training_features, vector< pDescriptor > &features) const
void saveToBinFile(const std::string &filename) const
filename
WeightingType
Weighting type.
Definition: BowVector.h:36
virtual TDescriptor getWord(WordId wid) const
WeightingType m_weighting
Weighting method.
static void SeedRandOnce()
Definition: Random.cpp:24
void load(const std::string &filename)
TDescriptor descriptor
Node descriptor.
void load(Archive &ar,::cv::Mat &m, const unsigned int version)
Definition: BoostArchiver.h:77
WordValue weight
Weight if the node is a word.
f
ScoringType
Scoring type.
Definition: BowVector.h:45
void saveToTextFile(const std::string &filename) const
XmlRpcServer s
void getWordsFromNode(NodeId nid, std::vector< WordId > &words) const
virtual NodeId getParentNode(WordId wid, int levelsup) const
void addWeight(WordId id, WordValue v)
Definition: BowVector.cpp:34
void initiateClustersKMpp(const vector< pDescriptor > &descriptors, vector< TDescriptor > &clusters) const
GeneralScoring * m_scoring_object
Object for computing scores.
virtual double score(const BowVector &v, const BowVector &w) const =0
void HKmeansStep(NodeId parent_id, const vector< pDescriptor > &descriptors, int current_level)
virtual bool mustNormalize(LNorm &norm) const =0
void normalize(LNorm norm_type)
Definition: BowVector.cpp:62
vector< NodeId > children
Children.
virtual void transform(const std::vector< TDescriptor > &features, BowVector &v) const
Base class of scoring functions.
Definition: ScoringObject.h:18
NodeId parent
Parent node (undefined in case of root)
virtual unsigned int size() const
WeightingType getWeightingType() const
ScoringType m_scoring
Scoring method.
LNorm
L-norms for normalization.
Definition: BowVector.h:29
std::vector< Node * > m_words
bool loadFromTextFile(const std::string &filename)
Vector of words to represent images.
Definition: BowVector.h:56
double WordValue
Value of a word.
Definition: BowVector.h:23
Vector of nodes with indexes of local features.
Definition: FeatureVector.h:21
static int RandomInt(int min, int max)
Definition: Random.cpp:47
unsigned int WordId
Id of words.
Definition: BowVector.h:20
std::ostream & operator<<(std::ostream &out, const BowVector &v)
Definition: BowVector.cpp:88
TFSIMD_FORCE_INLINE const tfScalar & w() const
void addIfNotExist(WordId id, WordValue v)
Definition: BowVector.cpp:50
const TDescriptor * pDescriptor
Pointer to descriptor.
void save(const std::string &filename) const
bool loadFromBinFile(const std::string &filename)
void save(Archive &ar, const ::cv::Mat &m, const unsigned int file_version)
Definition: BoostArchiver.h:60
WordId word_id
Word id if the node is a word.
virtual int stopWords(double minWeight)
virtual void create(const std::vector< std::vector< TDescriptor > > &training_features)
virtual WordValue getWordWeight(WordId wid) const
void setNodeWeights(const vector< vector< TDescriptor > > &features)
void addFeature(NodeId id, unsigned int i_feature)
unsigned int NodeId
Id of nodes in the vocabulary treee.
Definition: BowVector.h:26


orb_slam2_ros
Author(s):
autogenerated on Wed Apr 21 2021 02:53:05