FernImageDetector.cpp
Go to the documentation of this file.
1 /*
2  * This file is part of ALVAR, A Library for Virtual and Augmented Reality.
3  *
4  * Copyright 2007-2012 VTT Technical Research Centre of Finland
5  *
6  * Contact: VTT Augmented Reality Team <alvar.info@vtt.fi>
7  * <http://www.vtt.fi/multimedia/alvar.html>
8  *
9  * ALVAR is free software; you can redistribute it and/or modify it under the
10  * terms of the GNU Lesser General Public License as published by the Free
11  * Software Foundation; either version 2.1 of the License, or (at your option)
12  * any later version.
13  *
14  * This library is distributed in the hope that it will be useful, but WITHOUT
15  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
17  * for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public License
20  * along with ALVAR; if not, see
21  * <http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html>.
22  */
23 
24 #include "FernImageDetector.h"
25 
26 namespace alvar
27 {
28 
29 #define PATCH_SIZE 31
30 #define PYR_LEVELS 1
31 #define N_VIEWS 5000
32 #define N_PTS_TO_FIND 400
33 #define N_PTS_TO_TEACH 200
34 #define SIZE_BLUR 13
35 
36 #define N_STRUCTS 50
37 #define STRUCT_SIZE 11
38 #define SIGNATURE_SIZE 400
39 
40 // default opencv parameters
41 // PATCH_SIZE = 31,
42 // DEFAULT_STRUCTS = 50,
43 // DEFAULT_STRUCT_SIZE = 9,
44 // DEFAULT_VIEWS = 5000,
45 // DEFAULT_SIGNATURE_SIZE = 176,
46 // COMPRESSION_NONE = 0,
47 // COMPRESSION_RANDOM_PROJ = 1,
48 // COMPRESSION_PCA = 2,
49 // DEFAULT_COMPRESSION_METHOD = COMPRESSION_NONE
50 
51 // PatchGenerator(double _backgroundMin, double _backgroundMax,
52 // double _noiseRange, bool _randomBlur=true,
53 // double _lambdaMin=0.6, double _lambdaMax=1.5,
54 // double _thetaMin=-CV_PI, double _thetaMax=CV_PI,
55 // double _phiMin=-CV_PI, double _phiMax=CV_PI );
56 
57 // Calculate random parameterized affine transformation A
58 // A = T(patch center) * R(theta) * R(phi)' * S(lambda1, lambda2) * R(phi) * T(-pt)
59 
61  : FernClassifier()
62 {
63 }
64 
66  : FernClassifier(fileNode)
67 {
68 }
69 
70 FernClassifierWrapper::FernClassifierWrapper(const vector<vector<Point2f> > &points,
71  const vector<Mat> &referenceImages,
72  const vector<vector<int> > &labels,
73  int _nclasses, int _patchSize,
74  int _signatureSize, int _nstructs,
75  int _structSize, int _nviews,
76  int _compressionMethod,
77  const PatchGenerator &patchGenerator)
78  : FernClassifier(points, referenceImages, labels, _nclasses, _patchSize, _signatureSize,
79  _nstructs, _structSize, _nviews, _compressionMethod, patchGenerator)
80 {
81 }
82 
84 {
85 }
86 
87 void FernClassifierWrapper::readBinary(std::fstream &stream)
88 {
89  clear();
90 
91  stream.read((char *)&verbose, sizeof(verbose));
92  stream.read((char *)&nstructs, sizeof(nstructs));
93  stream.read((char *)&structSize, sizeof(structSize));
94  stream.read((char *)&nclasses, sizeof(nclasses));
95  stream.read((char *)&signatureSize, sizeof(signatureSize));
96  stream.read((char *)&compressionMethod, sizeof(compressionMethod));
97  stream.read((char *)&leavesPerStruct, sizeof(leavesPerStruct));
98  stream.read((char *)&patchSize.width, sizeof(patchSize.width));
99  stream.read((char *)&patchSize.height, sizeof(patchSize.height));
100 
101  std::vector<Feature>::size_type featuresSize;
102  stream.read((char *)&featuresSize, sizeof(featuresSize));
103  features.reserve(featuresSize);
104  unsigned int featuresValue;
105  Feature value;
106  for (std::vector<Feature>::size_type i = 0; i < featuresSize; ++i) {
107  stream.read((char *)&featuresValue, sizeof(featuresValue));
108  value.x1 = (uchar)(featuresValue % patchSize.width);
109  value.y1 = (uchar)(featuresValue / patchSize.width);
110  stream.read((char *)&featuresValue, sizeof(featuresValue));
111  value.x2 = (uchar)(featuresValue % patchSize.width);
112  value.y2 = (uchar)(featuresValue / patchSize.width);
113  features.push_back(value);
114  }
115 
116  // don't read classCounters
117  /*
118  std::vector<int>::size_type classCountersSize;
119  stream.read((char *)&classCountersSize, sizeof(classCountersSize));
120  classCounters.reserve(classCountersSize);
121  int classCountersValue;
122  for (std::vector<int>::size_type i = 0; i < classCountersSize; ++i) {
123  stream.read((char *)&classCountersValue, sizeof(classCountersValue));
124  classCounters.push_back(classCountersValue);
125  }
126  */
127 
128  std::vector<float>::size_type posteriorsSize;
129  stream.read((char *)&posteriorsSize, sizeof(posteriorsSize));
130  posteriors.reserve(posteriorsSize);
131  float posteriorsValue;
132  for (std::vector<float>::size_type i = 0; i < posteriorsSize; ++i) {
133  stream.read((char *)&posteriorsValue, sizeof(posteriorsValue));
134  posteriors.push_back(posteriorsValue);
135  }
136 }
137 
138 void FernClassifierWrapper::writeBinary(std::fstream &stream) const
139 {
140  stream.write((char *)&verbose, sizeof(verbose));
141  stream.write((char *)&nstructs, sizeof(nstructs));
142  stream.write((char *)&structSize, sizeof(structSize));
143  stream.write((char *)&nclasses, sizeof(nclasses));
144  stream.write((char *)&signatureSize, sizeof(signatureSize));
145  stream.write((char *)&compressionMethod, sizeof(compressionMethod));
146  stream.write((char *)&leavesPerStruct, sizeof(leavesPerStruct));
147  stream.write((char *)&patchSize.width, sizeof(patchSize.width));
148  stream.write((char *)&patchSize.height, sizeof(patchSize.height));
149 
150  std::vector<Feature>::size_type featuresSize = features.size();
151  stream.write((char *)&featuresSize, sizeof(featuresSize));
152  unsigned int featuresValue;
153  for (std::vector<Feature>::const_iterator itr = features.begin(); itr != features.end(); ++itr) {
154  featuresValue = itr->y1 * patchSize.width + itr->x1;
155  stream.write((char *)&featuresValue, sizeof(featuresValue));
156  featuresValue = itr->y2 * patchSize.width + itr->x2;
157  stream.write((char *)&featuresValue, sizeof(featuresValue));
158  }
159 
160  // don't write classCounters
161  /*
162  std::vector<int>::size_type classCountersSize = classCounters.size();
163  stream.write((char *)&classCountersSize, sizeof(classCountersSize));
164  for (std::vector<int>::const_iterator itr = classCounters.begin(); itr != classCounters.end(); ++itr) {
165  stream.write((char *)&*itr, sizeof(*itr));
166  }
167  */
168 
169  std::vector<float>::size_type posteriorsSize = posteriors.size();
170  stream.write((char *)&posteriorsSize, sizeof(posteriorsSize));
171  for (std::vector<float>::const_iterator itr = posteriors.begin(); itr != posteriors.end(); ++itr) {
172  stream.write((char *)&*itr, sizeof(*itr));
173  }
174 }
175 
177  : mPatchGenerator(0, 256, 13, true, /*0.25*/0.10, 1.0/*0.6, 1.5*/, -CV_PI*1.0, CV_PI*1.0, -CV_PI*0.0, CV_PI*0.0/*-2*CV_PI, 2*CV_PI*/) // TODO: check angle values, cant be -2pi..2pi ?
178  , mLDetector(3, 20, PYR_LEVELS, N_VIEWS, PATCH_SIZE, 2)
179  , mClassifier()
180  , mKeyPoints()
181  , mImagePoints()
182  , mModelPoints()
183  , mVisualize(visualize)
184  , mObjects()
185  , mSize()
186  , mCorrespondences()
187  , mHomography()
188  , mInlierRatio(0)
189 {
190  //mHomography.eye(3, 3, CV_64F);
191  mClassifier.resize(1);
192 }
193 
195 {
196 }
197 
198 void FernImageDetector::imagePoints(vector<CvPoint2D64f> &points)
199 {
200  points.clear();
201  for(size_t i = 0; i < mImagePoints.size(); ++i) {
202  points.push_back(cvPoint2D64f(mImagePoints[i].x, mImagePoints[i].y));
203  }
204 }
205 
206 void FernImageDetector::modelPoints(vector<CvPoint3D64f> &points, bool normalize)
207 {
208  points.clear();
209  //int minx = 1e10, miny = 1e10;
210  //int maxx = 0, maxy = 0;
211  for(size_t i = 0; i < mModelPoints.size(); ++i) {
212  CvPoint3D64f pt = cvPoint3D64f(mModelPoints[i].x, mModelPoints[i].y, 0.0);
213  if(normalize) {
214  //minx = (pt.x<minx)?pt.x:minx;
215  //miny = (pt.y<miny)?pt.y:miny;
216  //maxx = (pt.x>maxx)?pt.x:maxx;
217  //maxy = (pt.y>maxy)?pt.y:maxy;
218  pt.x -= mSize.width/2;
219  pt.y -= mSize.height/2;
220  pt.x /= mSize.width*0.10;
221  pt.y /= mSize.width*0.10;
222  }
223  points.push_back(pt);
224  }
225 }
226 
228 {
229  return mSize;
230 }
231 
233 {
234  return mHomography;
235 }
236 
238 {
239  return mInlierRatio;
240 }
241 
242 void FernImageDetector::train(const std::string &filename)
243 {
244  Mat object = imread(filename.c_str(), CV_LOAD_IMAGE_GRAYSCALE);
245  train(object);
246 }
247 
248 void FernImageDetector::train(Mat &object)
249 {
250  mObjects.push_back(object.clone());
251 
252  Mat blurredObject;
253  GaussianBlur(mObjects[0], blurredObject, Size(SIZE_BLUR, SIZE_BLUR), 0, 0);
254 
255  if(mVisualize) {
256  cvNamedWindow("Object", 1);
257  imshow("Object", blurredObject);
258  cv::waitKey(2000);
259  }
260 
261  //buildPyramid(object, objpyr, mLDetector.nOctaves-1);
262  //mLDetector(mObjects[0], mKeyPoints, N_PTS_TO_TEACH); // TODO: find robust features, TODO: in pyramids?
263  mLDetector.getMostStable2D(blurredObject, mKeyPoints, N_PTS_TO_TEACH, mPatchGenerator);
264 
265  if(mVisualize) {
266  for(int i = 0; i < (int)mKeyPoints.size(); ++i)
267  circle(blurredObject, mKeyPoints[i].pt, int(mKeyPoints[i].size/10), CV_RGB(64,64,64));
268 
269  imshow("Object", blurredObject);
270  cv::waitKey(2000);
271  }
272 
273  mClassifier[0].trainFromSingleView(blurredObject,
274  mKeyPoints,
275  PATCH_SIZE,
277  N_STRUCTS, // TODO: why was (int)mKeyPoints.size(), use here? why not a constant?
278  STRUCT_SIZE,
279  N_VIEWS,
280  FernClassifier::COMPRESSION_NONE,
282 
283  mSize = cv::Size(object.cols, object.rows);
284 }
285 
286 void FernImageDetector::findFeatures(Mat &object, bool planeAssumption)
287 {
288  //cv::flip(object, object, 1);
289 
290  vector<KeyPoint> keypoints;
291  vector<Mat> objpyr;
292 
293  GaussianBlur(object, object, Size(SIZE_BLUR, SIZE_BLUR), 0, 0);
294  //buildPyramid(object, objpyr, mLDetector.nOctaves-1);
295  mLDetector.nOctaves = 1;
296  mLDetector(object/*objpyr*/, keypoints, N_PTS_TO_FIND);
297 
298  int m = mKeyPoints.size();
299  int n = keypoints.size();
300  vector<int> bestMatches(m, -1);
301  vector<float> maxLogProb(m, -FLT_MAX);
302  vector<float> signature;
303  vector<int> pairs;
304 
305  for(size_t i = 0; i < keypoints.size(); ++i) {
306  Point2f pt = keypoints[i].pt;
307  //int oct = keypoints[i].octave; std::cout<<"oct "<<oct<<std::endl;
308  int k = mClassifier[0](object /*objpyr[oct]*/, pt, signature);
309  if(k >= 0 && (bestMatches[k] < 0 || signature[k] > maxLogProb[k])) {
310  maxLogProb[k] = signature[k];
311  bestMatches[k] = i;
312  }
313  }
314 
315  for(int i = 0; i < m; i++ )
316  if(bestMatches[i] >= 0) {
317  pairs.push_back(i);
318  pairs.push_back(bestMatches[i]);
319  }
320 
321  mCorrespondences = Mat(mObjects[0].rows + object.rows, std::max( mObjects[0].cols, object.cols), CV_8UC3);
322  mCorrespondences = Scalar(0.);
323  Mat part(mCorrespondences, Rect(0, 0, mObjects[0].cols, mObjects[0].rows));
324  cvtColor(mObjects[0], part, CV_GRAY2BGR);
325  part = Mat(mCorrespondences, Rect(0, mObjects[0].rows, object.cols, object.rows));
326  cvtColor(object, part, CV_GRAY2BGR);
327 
328  for(int i = 0; i < (int)keypoints.size(); ++i)
329  circle(object, keypoints[i].pt, int(keypoints[i].size/5), CV_RGB(64,64,64));
330 
331  vector<Point2f> fromPt, toPt;
332  vector<uchar> mask;
333  for(int i = 0; i < m; ++i)
334  if( bestMatches[i] >= 0 ){
335  fromPt.push_back(mKeyPoints[i].pt);
336  toPt.push_back(keypoints[bestMatches[i]].pt);
337  }
338 
339  static double valmin = 1.0;
340  static double valmax = 0.0;
341  mModelPoints.clear();
342  mImagePoints.clear();
343  int n_inliers = 0;
344 
345  if(planeAssumption && fromPt.size() > 8) {
346  cv::Mat H = cv::findHomography(Mat(fromPt), Mat(toPt), mask, RANSAC/*CV_LMEDS*/, 20);
347  mHomography = H;
348  //CompareModelAndObservation();
349 
350  for(size_t i = 0, j = 0; i < (int)pairs.size(); i += 2, ++j) {
351  if(mask[j]) {
352  cv::Point2f pi(keypoints[pairs[i+1]].pt);
353  cv::Point2f pw(mKeyPoints[pairs[i]].pt);
354  mModelPoints.push_back(pw);
355  mImagePoints.push_back(pi);
356  line(mCorrespondences, mKeyPoints[pairs[i]].pt,
357  keypoints[pairs[i+1]].pt + Point2f(0.0,(float)mObjects[0].rows),
358  Scalar(i*i%244,100-i*100%30,i*i-50*i));
359  n_inliers++;
360  }
361  }
362  } else {
363  for(size_t i = 0, j = 0; i < (int)pairs.size(); i += 2, ++j) {
364  cv::Point2f pi(keypoints[pairs[i+1]].pt);
365  cv::Point2f pw(mKeyPoints[pairs[i]].pt);
366  mModelPoints.push_back(pw);
367  mImagePoints.push_back(pi);
368  line(mCorrespondences, mKeyPoints[pairs[i]].pt,
369  keypoints[pairs[i+1]].pt + Point2f(0.0,(float)mObjects[0].rows),
370  Scalar(i*i%244,100-i*100%30,i*i-50*i));
371  }
372  }
373 
374 
375  double val = 0.0;
376  if(fromPt.size()>0) val = 1.*n_inliers/fromPt.size();
377  if(val > valmax) valmax = val;
378  if(val < valmin) valmin = val;
379 
380  mInlierRatio = val;
381 
382  if (mVisualize) {
383  cvNamedWindow("Matches", 1);
384  imshow("Matches", mCorrespondences);
385  cv::waitKey(1);
386  }
387 }
388 
389 bool FernImageDetector::read(const std::string &filename, const bool binary)
390 {
391  if (binary) {
392  std::fstream bs(filename.c_str(), std::fstream::in | std::fstream::binary);
393 
394  if (!bs.is_open()) {
395  return false;
396  }
397 
398  bs.read((char *)&mLDetector.radius, sizeof(mLDetector.radius));
399  bs.read((char *)&mLDetector.threshold, sizeof(mLDetector.threshold));
400  bs.read((char *)&mLDetector.nOctaves, sizeof(mLDetector.nOctaves));
401  bs.read((char *)&mLDetector.nViews, sizeof(mLDetector.nViews));
402  bs.read((char *)&mLDetector.verbose, sizeof(mLDetector.verbose));
403  bs.read((char *)&mLDetector.baseFeatureSize, sizeof(mLDetector.baseFeatureSize));
404  bs.read((char *)&mLDetector.clusteringDistance, sizeof(mLDetector.clusteringDistance));
405 
406  mClassifier[0].readBinary(bs);
407 
408  std::vector<float>::size_type size;
409  bs.read((char *)&size, sizeof(size));
410  mKeyPoints.reserve(size);
411  KeyPoint value;
412  for (std::vector<float>::size_type i = 0; i < size; ++i) {
413  bs.read((char *)&value.pt.x, sizeof(value.pt.x));
414  bs.read((char *)&value.pt.y, sizeof(value.pt.y));
415  bs.read((char *)&value.size, sizeof(value.size));
416  bs.read((char *)&value.angle, sizeof(value.angle));
417  bs.read((char *)&value.response, sizeof(value.response));
418  bs.read((char *)&value.octave, sizeof(value.octave));
419  bs.read((char *)&value.class_id, sizeof(value.class_id));
420  mKeyPoints.push_back(value);
421  }
422 
423  bs.read((char *)&mSize.width, sizeof(mSize.width));
424  bs.read((char *)&mSize.height, sizeof(mSize.height));
425 
426  std::vector<Mat>::size_type objectsSize;
427  bs.read((char *)&objectsSize, sizeof(objectsSize));
428  mObjects.reserve(objectsSize);
429  int rows;
430  int cols;
431  int type;
432  for (std::vector<Mat>::size_type i = 0; i < objectsSize; ++i) {
433  bs.read((char *)&rows, sizeof(rows));
434  bs.read((char *)&cols, sizeof(cols));
435  bs.read((char *)&type, sizeof(type));
436  Mat objectsValue(rows, cols, type);
437  bs.read((char *)objectsValue.data, objectsValue.elemSize() * objectsValue.total());
438  mObjects.push_back(objectsValue);
439  }
440 
441  bs.close();
442  }
443  else {
444  FileStorage fs(filename, FileStorage::READ);
445 
446  if (!fs.isOpened()) {
447  return false;
448  }
449 
450  FileNode node = fs.getFirstTopLevelNode();
451  std::cout << "loaded file" << std::endl;
452  cv::read(node["model_points"], mKeyPoints);
453  std::cout << "loaded model points" << std::endl;
454  mClassifier[0].read(node["fern_classifier"]);
455  std::cout << "loaded classifier" << std::endl;
456  }
457 
458  return true;
459 }
460 
461 bool FernImageDetector::write(const std::string &filename, const bool binary)
462 {
463  if (binary) {
464  std::fstream bs(filename.c_str(), std::fstream::out | std::fstream::binary);
465 
466  if (!bs.is_open()) {
467  return false;
468  }
469 
470  bs.write((char *)&mLDetector.radius, sizeof(mLDetector.radius));
471  bs.write((char *)&mLDetector.threshold, sizeof(mLDetector.threshold));
472  bs.write((char *)&mLDetector.nOctaves, sizeof(mLDetector.nOctaves));
473  bs.write((char *)&mLDetector.nViews, sizeof(mLDetector.nViews));
474  bs.write((char *)&mLDetector.verbose, sizeof(mLDetector.verbose));
475  bs.write((char *)&mLDetector.baseFeatureSize, sizeof(mLDetector.baseFeatureSize));
476  bs.write((char *)&mLDetector.clusteringDistance, sizeof(mLDetector.clusteringDistance));
477 
478  mClassifier[0].writeBinary(bs);
479 
480  std::vector<float>::size_type size = mKeyPoints.size();
481  bs.write((char *)&size, sizeof(size));
482  for (std::vector<KeyPoint>::const_iterator itr = mKeyPoints.begin(); itr != mKeyPoints.end(); ++itr) {
483  bs.write((char *)&itr->pt.x, sizeof(itr->pt.x));
484  bs.write((char *)&itr->pt.y, sizeof(itr->pt.y));
485  bs.write((char *)&itr->size, sizeof(itr->size));
486  bs.write((char *)&itr->angle, sizeof(itr->angle));
487  bs.write((char *)&itr->response, sizeof(itr->response));
488  bs.write((char *)&itr->octave, sizeof(itr->octave));
489  bs.write((char *)&itr->class_id, sizeof(itr->class_id));
490  }
491 
492  bs.write((char *)&mSize.width, sizeof(mSize.width));
493  bs.write((char *)&mSize.height, sizeof(mSize.height));
494 
495  std::vector<Mat>::size_type objectsSize = mObjects.size();
496  bs.write((char *)&objectsSize, sizeof(objectsSize));
497  for (std::vector<Mat>::const_iterator itr = mObjects.begin(); itr != mObjects.end(); ++itr) {
498  bs.write((char *)&itr->rows, sizeof(itr->rows));
499  bs.write((char *)&itr->cols, sizeof(itr->cols));
500  int type = itr->type();
501  bs.write((char *)&type, sizeof(type));
502  bs.write((char *)itr->data, itr->elemSize() * itr->total());
503  }
504 
505  bs.close();
506  }
507  else {
508  FileStorage fs(filename, FileStorage::WRITE);
509 
510  if (!fs.isOpened()) {
511  return false;
512  }
513 
514  WriteStructContext ws(fs, "fern_image_detector", CV_NODE_MAP);
515  cv::write(fs, "model_points", mKeyPoints);
516  mClassifier[0].write(fs, "fern_classifier");
517  }
518 
519  return true;
520 }
521 
522 } // namespace alvar
Main ALVAR namespace.
Definition: Alvar.h:174
This file implements a Fern-based image detector.
#define SIZE_BLUR
virtual void readBinary(std::fstream &stream)
std::vector< Mat > mObjects
#define N_VIEWS
int visualize
void modelPoints(vector< CvPoint3D64f > &points, bool normalize=true)
#define PATCH_SIZE
FernImageDetector(const bool visualize=false)
TFSIMD_FORCE_INLINE const tfScalar & y() const
#define PYR_LEVELS
void imagePoints(vector< CvPoint2D64f > &points)
CvImagePtr cvtColor(const CvImageConstPtr &source, const std::string &encoding)
#define N_PTS_TO_FIND
TFSIMD_FORCE_INLINE const tfScalar & x() const
std::vector< FernClassifierWrapper > mClassifier
bool read(const std::string &filename, const bool binary=true)
vector< cv::Point2f > mModelPoints
vector< KeyPoint > mKeyPoints
#define N_STRUCTS
PatchGenerator mPatchGenerator
#define N_PTS_TO_TEACH
#define SIGNATURE_SIZE
#define STRUCT_SIZE
void findFeatures(Mat &image, bool planeAssumption=true)
bool write(const std::string &filename, const bool binary=true)
vector< cv::Point2f > mImagePoints
virtual void writeBinary(std::fstream &stream) const
void train(const std::string &filename)


ar_track_alvar
Author(s): Scott Niekum
autogenerated on Mon Jun 10 2019 12:47:04