apriltag_quad_thresh.c
Go to the documentation of this file.
1 /* Copyright (C) 2013-2016, The Regents of The University of Michigan.
2 All rights reserved.
3 
4 This software was developed in the APRIL Robotics Lab under the
5 direction of Edwin Olson, ebolson@umich.edu. This software may be
6 available under alternative licensing terms; contact the address above.
7 
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are met:
10 
11 1. Redistributions of source code must retain the above copyright notice, this
12  list of conditions and the following disclaimer.
13 2. Redistributions in binary form must reproduce the above copyright notice,
14  this list of conditions and the following disclaimer in the documentation
15  and/or other materials provided with the distribution.
16 
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
21 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 
28 The views and conclusions contained in the software and documentation are those
29 of the authors and should not be interpreted as representing official policies,
30 either expressed or implied, of the Regents of The University of Michigan.
31 */
32 
33 // limitation: image size must be <32768 in width and height. This is
34 // because we use a fixed-point 16 bit integer representation with one
35 // fractional bit.
36 #include <math.h>
37 #include <assert.h>
38 #include <string.h>
39 #include <stdio.h>
40 #include <stdint.h>
41 
42 #include "apriltag.h"
43 #include "common/image_u8x3.h"
44 #include "common/zarray.h"
45 #include "common/zhash.h"
46 #include "common/unionfind.h"
47 #include "common/timeprofile.h"
48 #include "common/zmaxheap.h"
50 #include "common/math_util.h"
51 
52 static inline uint32_t u64hash_2(uint64_t x) {
53  return (2654435761 * x) >> 32;
54  return (uint32_t) x;
55 }
56 
58 {
59  uint64_t id;
61 
63 };
64 
65 #ifndef M_PI
66 # define M_PI 3.141592653589793238462643383279502884196
67 #endif
68 
69 struct pt
70 {
71  // Note: these represent 2*actual value.
72  uint16_t x, y;
73  float theta;
74  int16_t gx, gy;
75 };
76 
78 {
79  int y0, y1;
80  int w, h, s;
83 };
84 
85 struct quad_task
86 {
88  int cidx0, cidx1; // [cidx0, cidx1)
91  int w, h;
92 
94 };
95 
97 {
98  int i; // which vertex to remove?
99  int left, right; // left vertex, right vertex
100 
101  double err;
102 };
103 
104 struct segment
105 {
107 
108  // always greater than zero, but right can be > size, which denotes
109  // a wrap around back to the beginning of the points. and left < right.
110  int left, right;
111 };
112 
114 {
115  double Mx, My;
116  double Mxx, Myy, Mxy;
117  double W; // total weight
118 };
119 
120 static inline void ptsort(struct pt *pts, int sz)
121 {
122 #define MAYBE_SWAP(arr,apos,bpos) \
123  if (arr[apos].theta > arr[bpos].theta) { \
124  tmp = arr[apos]; arr[apos] = arr[bpos]; arr[bpos] = tmp; \
125  };
126 
127  if (sz <= 1)
128  return;
129 
130  if (sz == 2) {
131  struct pt tmp;
132  MAYBE_SWAP(pts, 0, 1);
133  return;
134  }
135 
136  // NB: Using less-branch-intensive sorting networks here on the
137  // hunch that it's better for performance.
138  if (sz == 3) { // 3 element bubble sort is optimal
139  struct pt tmp;
140  MAYBE_SWAP(pts, 0, 1);
141  MAYBE_SWAP(pts, 1, 2);
142  MAYBE_SWAP(pts, 0, 1);
143  return;
144  }
145 
146  if (sz == 4) { // 4 element optimal sorting network.
147  struct pt tmp;
148  MAYBE_SWAP(pts, 0, 1); // sort each half, like a merge sort
149  MAYBE_SWAP(pts, 2, 3);
150  MAYBE_SWAP(pts, 0, 2); // minimum value is now at 0.
151  MAYBE_SWAP(pts, 1, 3); // maximum value is now at end.
152  MAYBE_SWAP(pts, 1, 2); // that only leaves the middle two.
153  return;
154  }
155 
156  if (sz == 5) {
157  // this 9-step swap is optimal for a sorting network, but two
158  // steps slower than a generic sort.
159  struct pt tmp;
160  MAYBE_SWAP(pts, 0, 1); // sort each half (3+2), like a merge sort
161  MAYBE_SWAP(pts, 3, 4);
162  MAYBE_SWAP(pts, 1, 2);
163  MAYBE_SWAP(pts, 0, 1);
164  MAYBE_SWAP(pts, 0, 3); // minimum element now at 0
165  MAYBE_SWAP(pts, 2, 4); // maximum element now at end
166  MAYBE_SWAP(pts, 1, 2); // now resort the three elements 1-3.
167  MAYBE_SWAP(pts, 2, 3);
168  MAYBE_SWAP(pts, 1, 2);
169  return;
170  }
171 
172 #undef MAYBE_SWAP
173 
174  // a merge sort with temp storage.
175 
176  // Use stack storage if it's not too big.
177  int stacksz = sz;
178  if (stacksz > 1024)
179  stacksz = 0;
180 
181  struct pt _tmp_stack[stacksz];
182  struct pt *tmp = _tmp_stack;
183 
184  if (stacksz == 0) {
185  // it was too big, malloc it instead.
186  tmp = malloc(sizeof(struct pt) * sz);
187  }
188 
189  memcpy(tmp, pts, sizeof(struct pt) * sz);
190 
191  int asz = sz/2;
192  int bsz = sz - asz;
193 
194  struct pt *as = &tmp[0];
195  struct pt *bs = &tmp[asz];
196 
197  ptsort(as, asz);
198  ptsort(bs, bsz);
199 
200 #define MERGE(apos,bpos) \
201  if (as[apos].theta < bs[bpos].theta) \
202  pts[outpos++] = as[apos++]; \
203  else \
204  pts[outpos++] = bs[bpos++];
205 
206  int apos = 0, bpos = 0, outpos = 0;
207  while (apos + 8 < asz && bpos + 8 < bsz) {
208  MERGE(apos,bpos); MERGE(apos,bpos); MERGE(apos,bpos); MERGE(apos,bpos);
209  MERGE(apos,bpos); MERGE(apos,bpos); MERGE(apos,bpos); MERGE(apos,bpos);
210  }
211 
212  while (apos < asz && bpos < bsz) {
213  MERGE(apos,bpos);
214  }
215 
216  if (apos < asz)
217  memcpy(&pts[outpos], &as[apos], (asz-apos)*sizeof(struct pt));
218  if (bpos < bsz)
219  memcpy(&pts[outpos], &bs[bpos], (bsz-bpos)*sizeof(struct pt));
220 
221  if (stacksz == 0)
222  free(tmp);
223 
224 #undef MERGE
225 }
226 
227 // lfps contains *cumulative* moments for N points, with
228 // index j reflecting points [0,j] (inclusive).
229 //
230 // fit a line to the points [i0, i1] (inclusive). i0, i1 are both [0,
231 // sz) if i1 < i0, we treat this as a wrap around.
232 void fit_line(struct line_fit_pt *lfps, int sz, int i0, int i1, double *lineparm, double *err, double *mse)
233 {
234  assert(i0 != i1);
235  assert(i0 >= 0 && i1 >= 0 && i0 < sz && i1 < sz);
236 
237  double Mx, My, Mxx, Myy, Mxy, W;
238  int N; // how many points are included in the set?
239 
240  if (i0 < i1) {
241  N = i1 - i0 + 1;
242 
243  Mx = lfps[i1].Mx;
244  My = lfps[i1].My;
245  Mxx = lfps[i1].Mxx;
246  Mxy = lfps[i1].Mxy;
247  Myy = lfps[i1].Myy;
248  W = lfps[i1].W;
249 
250  if (i0 > 0) {
251  Mx -= lfps[i0-1].Mx;
252  My -= lfps[i0-1].My;
253  Mxx -= lfps[i0-1].Mxx;
254  Mxy -= lfps[i0-1].Mxy;
255  Myy -= lfps[i0-1].Myy;
256  W -= lfps[i0-1].W;
257  }
258 
259  } else {
260  // i0 > i1, e.g. [15, 2]. Wrap around.
261  assert(i0 > 0);
262 
263  Mx = lfps[sz-1].Mx - lfps[i0-1].Mx;
264  My = lfps[sz-1].My - lfps[i0-1].My;
265  Mxx = lfps[sz-1].Mxx - lfps[i0-1].Mxx;
266  Mxy = lfps[sz-1].Mxy - lfps[i0-1].Mxy;
267  Myy = lfps[sz-1].Myy - lfps[i0-1].Myy;
268  W = lfps[sz-1].W - lfps[i0-1].W;
269 
270  Mx += lfps[i1].Mx;
271  My += lfps[i1].My;
272  Mxx += lfps[i1].Mxx;
273  Mxy += lfps[i1].Mxy;
274  Myy += lfps[i1].Myy;
275  W += lfps[i1].W;
276 
277  N = sz - i0 + i1 + 1;
278  }
279 
280  assert(N >= 2);
281 
282  double Ex = Mx / W;
283  double Ey = My / W;
284  double Cxx = Mxx / W - Ex*Ex;
285  double Cxy = Mxy / W - Ex*Ey;
286  double Cyy = Myy / W - Ey*Ey;
287 
288  double nx, ny;
289 
290  if (1) {
291  // on iOS about 5% of total CPU spent in these trig functions.
292  // 85 ms per frame on 5S, example.pnm
293  //
294  // XXX this was using the double-precision atan2. Was there a case where
295  // we needed that precision? Seems doubtful.
296  double normal_theta = .5 * atan2f(-2*Cxy, (Cyy - Cxx));
297  nx = cosf(normal_theta);
298  ny = sinf(normal_theta);
299  } else {
300  // 73.5 ms per frame on 5S, example.pnm
301  double ty = -2*Cxy;
302  double tx = (Cyy - Cxx);
303  double mag = ty*ty + tx*tx;
304 
305  if (mag == 0) {
306  nx = 1;
307  ny = 0;
308  } else {
309  double norm = sqrtf(ty*ty + tx*tx);
310  tx /= norm;
311 
312  // ty is now sin(2theta)
313  // tx is now cos(2theta). We want sin(theta) and cos(theta)
314 
315  // due to precision err, tx could still have slightly too large magnitude.
316  if (tx > 1) {
317  ny = 0;
318  nx = 1;
319  } else if (tx < -1) {
320  ny = 1;
321  nx = 0;
322  } else {
323  // half angle formula
324  ny = sqrtf((1 - tx)/2);
325  nx = sqrtf((1 + tx)/2);
326 
327  // pick a consistent branch cut
328  if (ty < 0)
329  ny = - ny;
330  }
331  }
332  }
333 
334  if (lineparm) {
335  lineparm[0] = Ex;
336  lineparm[1] = Ey;
337  lineparm[2] = nx;
338  lineparm[3] = ny;
339  }
340 
341  // sum of squared errors =
342  //
343  // SUM_i ((p_x - ux)*nx + (p_y - uy)*ny)^2
344  // SUM_i nx*nx*(p_x - ux)^2 + 2nx*ny(p_x -ux)(p_y-uy) + ny*ny*(p_y-uy)*(p_y-uy)
345  // nx*nx*SUM_i((p_x -ux)^2) + 2nx*ny*SUM_i((p_x-ux)(p_y-uy)) + ny*ny*SUM_i((p_y-uy)^2)
346  //
347  // nx*nx*N*Cxx + 2nx*ny*N*Cxy + ny*ny*N*Cyy
348 
349  // sum of squared errors
350  if (err)
351  *err = nx*nx*N*Cxx + 2*nx*ny*N*Cxy + ny*ny*N*Cyy;
352 
353  // mean squared error
354  if (mse)
355  *mse = nx*nx*Cxx + 2*nx*ny*Cxy + ny*ny*Cyy;
356 }
357 
358 int pt_compare_theta(const void *_a, const void *_b)
359 {
360  struct pt *a = (struct pt*) _a;
361  struct pt *b = (struct pt*) _b;
362 
363  return (a->theta < b->theta) ? -1 : 1;
364 }
365 
366 int err_compare_descending(const void *_a, const void *_b)
367 {
368  const double *a = _a;
369  const double *b = _b;
370 
371  return ((*a) < (*b)) ? 1 : -1;
372 }
373 
374 /*
375 
376  1. Identify A) white points near a black point and B) black points near a white point.
377 
378  2. Find the connected components within each of the classes above,
379  yielding clusters of "white-near-black" and
380  "black-near-white". (These two classes are kept separate). Each
381  segment has a unique id.
382 
383  3. For every pair of "white-near-black" and "black-near-white"
384  clusters, find the set of points that are in one and adjacent to the
385  other. In other words, a "boundary" layer between the two
386  clusters. (This is actually performed by iterating over the pixels,
387  rather than pairs of clusters.) Critically, this helps keep nearby
388  edges from becoming connected.
389 */
390 int quad_segment_maxima(apriltag_detector_t *td, zarray_t *cluster, struct line_fit_pt *lfps, int indices[4])
391 {
392  int sz = zarray_size(cluster);
393 
394  // ksz: when fitting points, how many points on either side do we consider?
395  // (actual "kernel" width is 2ksz).
396  //
397  // This value should be about: 0.5 * (points along shortest edge).
398  //
399  // If all edges were equally-sized, that would give a value of
400  // sz/8. We make it somewhat smaller to account for tags at high
401  // aspects.
402 
403  // XXX Tunable. Maybe make a multiple of JPEG block size to increase robustness
404  // to JPEG compression artifacts?
405  int ksz = imin(20, sz / 12);
406 
407  // can't fit a quad if there are too few points.
408  if (ksz < 2)
409  return 0;
410 
411 // printf("sz %5d, ksz %3d\n", sz, ksz);
412 
413  double errs[sz];
414 
415  for (int i = 0; i < sz; i++) {
416  fit_line(lfps, sz, (i + sz - ksz) % sz, (i + ksz) % sz, NULL, &errs[i], NULL);
417  }
418 
419  // apply a low-pass filter to errs
420  if (1) {
421  double y[sz];
422 
423  // how much filter to apply?
424 
425  // XXX Tunable
426  double sigma = 1; // was 3
427 
428  // cutoff = exp(-j*j/(2*sigma*sigma));
429  // log(cutoff) = -j*j / (2*sigma*sigma)
430  // log(cutoff)*2*sigma*sigma = -j*j;
431 
432  // how big a filter should we use? We make our kernel big
433  // enough such that we represent any values larger than
434  // 'cutoff'.
435 
436  // XXX Tunable (though not super useful to change)
437  double cutoff = 0.05;
438  int fsz = sqrt(-log(cutoff)*2*sigma*sigma) + 1;
439  fsz = 2*fsz + 1;
440 
441  // For default values of cutoff = 0.05, sigma = 3,
442  // we have fsz = 17.
443  float f[fsz];
444 
445  for (int i = 0; i < fsz; i++) {
446  int j = i - fsz / 2;
447  f[i] = exp(-j*j/(2*sigma*sigma));
448  }
449 
450  for (int iy = 0; iy < sz; iy++) {
451  double acc = 0;
452 
453  for (int i = 0; i < fsz; i++) {
454  acc += errs[(iy + i - fsz / 2 + sz) % sz] * f[i];
455  }
456  y[iy] = acc;
457  }
458 
459  memcpy(errs, y, sizeof(y));
460  }
461 
462  int maxima[sz];
463  double maxima_errs[sz];
464  int nmaxima = 0;
465 
466  for (int i = 0; i < sz; i++) {
467  if (errs[i] > errs[(i+1)%sz] && errs[i] > errs[(i+sz-1)%sz]) {
468  maxima[nmaxima] = i;
469  maxima_errs[nmaxima] = errs[i];
470  nmaxima++;
471  }
472  }
473 
474  // if we didn't get at least 4 maxima, we can't fit a quad.
475  if (nmaxima < 4)
476  return 0;
477 
478  // select only the best maxima if we have too many
479  int max_nmaxima = td->qtp.max_nmaxima;
480 
481  if (nmaxima > max_nmaxima) {
482  double maxima_errs_copy[nmaxima];
483  memcpy(maxima_errs_copy, maxima_errs, sizeof(maxima_errs_copy));
484 
485  // throw out all but the best handful of maxima. Sorts descending.
486  qsort(maxima_errs_copy, nmaxima, sizeof(double), err_compare_descending);
487 
488  double maxima_thresh = maxima_errs_copy[max_nmaxima];
489  int out = 0;
490  for (int in = 0; in < nmaxima; in++) {
491  if (maxima_errs[in] <= maxima_thresh)
492  continue;
493  maxima[out++] = maxima[in];
494  }
495  nmaxima = out;
496  }
497 
498  int best_indices[4];
499  double best_error = HUGE_VALF;
500 
501  double err01, err12, err23, err30;
502  double mse01, mse12, mse23, mse30;
503  double params01[4], params12[4], params23[4], params30[4];
504 
505  // disallow quads where the angle is less than a critical value.
506  double max_dot = cos(td->qtp.critical_rad); //25*M_PI/180);
507 
508  for (int m0 = 0; m0 < nmaxima - 3; m0++) {
509  int i0 = maxima[m0];
510 
511  for (int m1 = m0+1; m1 < nmaxima - 2; m1++) {
512  int i1 = maxima[m1];
513 
514  fit_line(lfps, sz, i0, i1, params01, &err01, &mse01);
515 
516  if (mse01 > td->qtp.max_line_fit_mse)
517  continue;
518 
519  for (int m2 = m1+1; m2 < nmaxima - 1; m2++) {
520  int i2 = maxima[m2];
521 
522  fit_line(lfps, sz, i1, i2, params12, &err12, &mse12);
523  if (mse12 > td->qtp.max_line_fit_mse)
524  continue;
525 
526  double dot = params01[2]*params12[2] + params01[3]*params12[3];
527  if (fabs(dot) > max_dot)
528  continue;
529 
530  for (int m3 = m2+1; m3 < nmaxima; m3++) {
531  int i3 = maxima[m3];
532 
533  fit_line(lfps, sz, i2, i3, params23, &err23, &mse23);
534  if (mse23 > td->qtp.max_line_fit_mse)
535  continue;
536 
537  fit_line(lfps, sz, i3, i0, params30, &err30, &mse30);
538  if (mse30 > td->qtp.max_line_fit_mse)
539  continue;
540 
541  double err = err01 + err12 + err23 + err30;
542  if (err < best_error) {
543  best_error = err;
544  best_indices[0] = i0;
545  best_indices[1] = i1;
546  best_indices[2] = i2;
547  best_indices[3] = i3;
548  }
549  }
550  }
551  }
552  }
553 
554  if (best_error == HUGE_VALF)
555  return 0;
556 
557  for (int i = 0; i < 4; i++)
558  indices[i] = best_indices[i];
559 
560  if (best_error / sz < td->qtp.max_line_fit_mse)
561  return 1;
562  return 0;
563 }
564 
565 // returns 0 if the cluster looks bad.
566 int quad_segment_agg(apriltag_detector_t *td, zarray_t *cluster, struct line_fit_pt *lfps, int indices[4])
567 {
568  int sz = zarray_size(cluster);
569 
570  zmaxheap_t *heap = zmaxheap_create(sizeof(struct remove_vertex*));
571 
572  // We will initially allocate sz rvs. We then have two types of
573  // iterations: some iterations that are no-ops in terms of
574  // allocations, and those that remove a vertex and allocate two
575  // more children. This will happen at most (sz-4) times. Thus we
576  // need: sz + 2*(sz-4) entries.
577 
578  int rvalloc_pos = 0;
579  int rvalloc_size = 3*sz;
580  struct remove_vertex *rvalloc = calloc(rvalloc_size, sizeof(struct remove_vertex));
581 
582  struct segment *segs = calloc(sz, sizeof(struct segment));
583 
584  // populate with initial entries
585  for (int i = 0; i < sz; i++) {
586  struct remove_vertex *rv = &rvalloc[rvalloc_pos++];
587  rv->i = i;
588  if (i == 0) {
589  rv->left = sz-1;
590  rv->right = 1;
591  } else {
592  rv->left = i-1;
593  rv->right = (i+1) % sz;
594  }
595 
596  fit_line(lfps, sz, rv->left, rv->right, NULL, NULL, &rv->err);
597 
598  zmaxheap_add(heap, &rv, -rv->err);
599 
600  segs[i].left = rv->left;
601  segs[i].right = rv->right;
602  segs[i].is_vertex = 1;
603  }
604 
605  int nvertices = sz;
606 
607  while (nvertices > 4) {
608  assert(rvalloc_pos < rvalloc_size);
609 
610  struct remove_vertex *rv;
611  float err;
612 
613  int res = zmaxheap_remove_max(heap, &rv, &err);
614  if (!res)
615  return 0;
616  assert(res);
617 
618  // is this remove_vertex valid? (Or has one of the left/right
619  // vertices changes since we last looked?)
620  if (!segs[rv->i].is_vertex ||
621  !segs[rv->left].is_vertex ||
622  !segs[rv->right].is_vertex) {
623  continue;
624  }
625 
626  // we now merge.
627  assert(segs[rv->i].is_vertex);
628 
629  segs[rv->i].is_vertex = 0;
630  segs[rv->left].right = rv->right;
631  segs[rv->right].left = rv->left;
632 
633  // create the join to the left
634  if (1) {
635  struct remove_vertex *child = &rvalloc[rvalloc_pos++];
636  child->i = rv->left;
637  child->left = segs[rv->left].left;
638  child->right = rv->right;
639 
640  fit_line(lfps, sz, child->left, child->right, NULL, NULL, &child->err);
641 
642  zmaxheap_add(heap, &child, -child->err);
643  }
644 
645  // create the join to the right
646  if (1) {
647  struct remove_vertex *child = &rvalloc[rvalloc_pos++];
648  child->i = rv->right;
649  child->left = rv->left;
650  child->right = segs[rv->right].right;
651 
652  fit_line(lfps, sz, child->left, child->right, NULL, NULL, &child->err);
653 
654  zmaxheap_add(heap, &child, -child->err);
655  }
656 
657  // we now have one less vertex
658  nvertices--;
659  }
660 
661  free(rvalloc);
662  zmaxheap_destroy(heap);
663 
664  int idx = 0;
665  for (int i = 0; i < sz; i++) {
666  if (segs[i].is_vertex) {
667  indices[idx++] = i;
668  }
669  }
670 
671  free(segs);
672 
673  return 1;
674 }
675 
676 // return 1 if the quad looks okay, 0 if it should be discarded
678 {
679  int res = 0;
680 
681  int sz = zarray_size(cluster);
682  if (sz < 4) // can't fit a quad to less than 4 points
683  return 0;
684 
686  // Step 1. Sort points so they wrap around the center of the
687  // quad. We will constrain our quad fit to simply partition this
688  // ordered set into 4 groups.
689 
690  // compute a bounding box so that we can order the points
691  // according to their angle WRT the center.
692  int32_t xmax = 0, xmin = INT32_MAX, ymax = 0, ymin = INT32_MAX;
693 
694  for (int pidx = 0; pidx < zarray_size(cluster); pidx++) {
695  struct pt *p;
696  zarray_get_volatile(cluster, pidx, &p);
697 
698  xmax = imax(xmax, p->x);
699  xmin = imin(xmin, p->x);
700 
701  ymax = imax(ymax, p->y);
702  ymin = imin(ymin, p->y);
703  }
704 
705  // add some noise to (cx,cy) so that pixels get a more diverse set
706  // of theta estimates. This will help us remove more points.
707  // (Only helps a small amount. The actual noise values here don't
708  // matter much at all, but we want them [-1, 1]. (XXX with
709  // fixed-point, should range be bigger?)
710  double cx = (xmin + xmax) * 0.5 + 0.05118;
711  double cy = (ymin + ymax) * 0.5 + -0.028581;
712 
713  double dot = 0;
714 
715  for (int pidx = 0; pidx < zarray_size(cluster); pidx++) {
716  struct pt *p;
717  zarray_get_volatile(cluster, pidx, &p);
718 
719  double dx = p->x - cx;
720  double dy = p->y - cy;
721 
722  p->theta = atan2f(dy, dx);
723 
724  dot += dx*p->gx + dy*p->gy;
725 // p->theta = terrible_atan2(dy, dx);
726  }
727 
728  // Ensure that the black border is inside the white border.
729  if (dot < 0)
730  return 0;
731 
732  // we now sort the points according to theta. This is a prepatory
733  // step for segmenting them into four lines.
734  if (1) {
735  // zarray_sort(cluster, pt_compare_theta);
736  ptsort((struct pt*) cluster->data, zarray_size(cluster));
737 
738  // remove duplicate points. (A byproduct of our segmentation system.)
739  if (1) {
740  int outpos = 1;
741 
742  struct pt *last;
743  zarray_get_volatile(cluster, 0, &last);
744 
745  for (int i = 1; i < sz; i++) {
746 
747  struct pt *p;
748  zarray_get_volatile(cluster, i, &p);
749 
750  if (p->x != last->x || p->y != last->y) {
751 
752  if (i != outpos) {
753  struct pt *out;
754  zarray_get_volatile(cluster, outpos, &out);
755  memcpy(out, p, sizeof(struct pt));
756  }
757 
758  outpos++;
759  }
760 
761  last = p;
762  }
763 
764  cluster->size = outpos;
765  sz = outpos;
766  }
767 
768  } else {
769  // This is a counting sort in which we retain at most one
770  // point for every bucket; the bucket index is computed from
771  // theta. Since a good quad completes a complete revolution,
772  // there's reason to think that we should get a good
773  // distribution of thetas. We might "lose" a few points due
774  // to collisions, but this shouldn't affect quality very much.
775 
776  // XXX tunable. Increase to reduce the likelihood of "losing"
777  // points due to collisions.
778  int nbuckets = 4*sz;
779 
780 #define ASSOC 2
781  struct pt v[nbuckets][ASSOC];
782  memset(v, 0, sizeof(v));
783 
784  // put each point into a bucket.
785  for (int i = 0; i < sz; i++) {
786  struct pt *p;
787  zarray_get_volatile(cluster, i, &p);
788 
789  assert(p->theta >= -M_PI && p->theta <= M_PI);
790 
791  int bucket = (nbuckets - 1) * (p->theta + M_PI) / (2*M_PI);
792  assert(bucket >= 0 && bucket < nbuckets);
793 
794  for (int i = 0; i < ASSOC; i++) {
795  if (v[bucket][i].theta == 0) {
796  v[bucket][i] = *p;
797  break;
798  }
799  }
800  }
801 
802  // collect the points from the buckets and put them back into the array.
803  int outsz = 0;
804  for (int i = 0; i < nbuckets; i++) {
805  for (int j = 0; j < ASSOC; j++) {
806  if (v[i][j].theta != 0) {
807  zarray_set(cluster, outsz, &v[i][j], NULL);
808  outsz++;
809  }
810  }
811  }
812 
813  zarray_truncate(cluster, outsz);
814  sz = outsz;
815  }
816 
817  if (sz < 4)
818  return 0;
819 
821  // Step 2. Precompute statistics that allow line fit queries to be
822  // efficiently computed for any contiguous range of indices.
823 
824  struct line_fit_pt *lfps = calloc(sz, sizeof(struct line_fit_pt));
825 
826  for (int i = 0; i < sz; i++) {
827  struct pt *p;
828  zarray_get_volatile(cluster, i, &p);
829 
830  if (i > 0) {
831  memcpy(&lfps[i], &lfps[i-1], sizeof(struct line_fit_pt));
832  }
833 
834  if (0) {
835  // we now undo our fixed-point arithmetic.
836  double delta = 0.5;
837  double x = p->x * .5 + delta;
838  double y = p->y * .5 + delta;
839  double W;
840 
841  for (int dy = -1; dy <= 1; dy++) {
842  int iy = y + dy;
843 
844  if (iy < 0 || iy + 1 >= im->height)
845  continue;
846 
847  for (int dx = -1; dx <= 1; dx++) {
848  int ix = x + dx;
849 
850  if (ix < 0 || ix + 1 >= im->width)
851  continue;
852 
853  int grad_x = im->buf[iy * im->stride + ix + 1] -
854  im->buf[iy * im->stride + ix - 1];
855 
856  int grad_y = im->buf[(iy+1) * im->stride + ix] -
857  im->buf[(iy-1) * im->stride + ix];
858 
859  W = sqrtf(grad_x*grad_x + grad_y*grad_y) + 1;
860 
861 // double fx = x + dx, fy = y + dy;
862  double fx = ix + .5, fy = iy + .5;
863  lfps[i].Mx += W * fx;
864  lfps[i].My += W * fy;
865  lfps[i].Mxx += W * fx * fx;
866  lfps[i].Mxy += W * fx * fy;
867  lfps[i].Myy += W * fy * fy;
868  lfps[i].W += W;
869  }
870  }
871  } else {
872  // we now undo our fixed-point arithmetic.
873  double delta = 0.5; // adjust for pixel center bias
874  double x = p->x * .5 + delta;
875  double y = p->y * .5 + delta;
876  int ix = x, iy = y;
877  double W = 1;
878 
879  if (ix > 0 && ix+1 < im->width && iy > 0 && iy+1 < im->height) {
880  int grad_x = im->buf[iy * im->stride + ix + 1] -
881  im->buf[iy * im->stride + ix - 1];
882 
883  int grad_y = im->buf[(iy+1) * im->stride + ix] -
884  im->buf[(iy-1) * im->stride + ix];
885 
886  // XXX Tunable. How to shape the gradient magnitude?
887  W = sqrt(grad_x*grad_x + grad_y*grad_y) + 1;
888  }
889 
890  double fx = x, fy = y;
891  lfps[i].Mx += W * fx;
892  lfps[i].My += W * fy;
893  lfps[i].Mxx += W * fx * fx;
894  lfps[i].Mxy += W * fx * fy;
895  lfps[i].Myy += W * fy * fy;
896  lfps[i].W += W;
897  }
898  }
899 
900  int indices[4];
901  if (1) {
902  if (!quad_segment_maxima(td, cluster, lfps, indices))
903  goto finish;
904  } else {
905  if (!quad_segment_agg(td, cluster, lfps, indices))
906  goto finish;
907  }
908 
909 // printf("%d %d %d %d\n", indices[0], indices[1], indices[2], indices[3]);
910 
911  if (0) {
912  // no refitting here; just use those points as the vertices.
913  // Note, this is useful for debugging, but pretty bad in
914  // practice since this code path also omits several
915  // plausibility checks that save us tons of time in quad
916  // decoding.
917  for (int i = 0; i < 4; i++) {
918  struct pt *p;
919  zarray_get_volatile(cluster, indices[i], &p);
920 
921  quad->p[i][0] = .5*p->x; // undo fixed-point arith.
922  quad->p[i][1] = .5*p->y;
923  }
924 
925  res = 1;
926 
927  } else {
928  double lines[4][4];
929 
930  for (int i = 0; i < 4; i++) {
931  int i0 = indices[i];
932  int i1 = indices[(i+1)&3];
933 
934  if (0) {
935  // if there are enough points, skip the points near the corners
936  // (because those tend not to be very good.)
937  if (i1-i0 > 8) {
938  int t = (i1-i0)/6;
939  if (t < 0)
940  t = -t;
941 
942  i0 = (i0 + t) % sz;
943  i1 = (i1 + sz - t) % sz;
944  }
945  }
946 
947  double err;
948  fit_line(lfps, sz, i0, i1, lines[i], NULL, &err);
949 
950  if (err > td->qtp.max_line_fit_mse) {
951  res = 0;
952  goto finish;
953  }
954  }
955 
956  for (int i = 0; i < 4; i++) {
957  // solve for the intersection of lines (i) and (i+1)&3.
958  // p0 + lambda0*u0 = p1 + lambda1*u1, where u0 and u1
959  // are the line directions.
960  //
961  // lambda0*u0 - lambda1*u1 = (p1 - p0)
962  //
963  // rearrange (solve for lambdas)
964  //
965  // [u0_x -u1_x ] [lambda0] = [ p1_x - p0_x ]
966  // [u0_y -u1_y ] [lambda1] [ p1_y - p0_y ]
967  //
968  // remember that lines[i][0,1] = p, lines[i][2,3] = NORMAL vector.
969  // We want the unit vector, so we need the perpendiculars. Thus, below
970  // we have swapped the x and y components and flipped the y components.
971 
972  double A00 = lines[i][3], A01 = -lines[(i+1)&3][3];
973  double A10 = -lines[i][2], A11 = lines[(i+1)&3][2];
974  double B0 = -lines[i][0] + lines[(i+1)&3][0];
975  double B1 = -lines[i][1] + lines[(i+1)&3][1];
976 
977  double det = A00 * A11 - A10 * A01;
978 
979  // inverse.
980  double W00 = A11 / det, W01 = -A01 / det;
981  if (fabs(det) < 0.001) {
982  res = 0;
983  goto finish;
984  }
985 
986  // solve
987  double L0 = W00*B0 + W01*B1;
988 
989  // compute intersection
990  quad->p[i][0] = lines[i][0] + L0*A00;
991  quad->p[i][1] = lines[i][1] + L0*A10;
992 
993  if (0) {
994  // we should get the same intersection starting
995  // from point p1 and moving L1*u1.
996  double W10 = -A10 / det, W11 = A00 / det;
997  double L1 = W10*B0 + W11*B1;
998 
999  double x = lines[(i+1)&3][0] - L1*A10;
1000  double y = lines[(i+1)&3][1] - L1*A11;
1001  assert(fabs(x - quad->p[i][0]) < 0.001 &&
1002  fabs(y - quad->p[i][1]) < 0.001);
1003  }
1004 
1005  res = 1;
1006  }
1007  }
1008 
1009  // reject quads that are too small
1010  if (1) {
1011  double area = 0;
1012 
1013  // get area of triangle formed by points 0, 1, 2, 0
1014  double length[3], p;
1015  for (int i = 0; i < 3; i++) {
1016  int idxa = i; // 0, 1, 2,
1017  int idxb = (i+1) % 3; // 1, 2, 0
1018  length[i] = sqrt(sq(quad->p[idxb][0] - quad->p[idxa][0]) +
1019  sq(quad->p[idxb][1] - quad->p[idxa][1]));
1020  }
1021  p = (length[0] + length[1] + length[2]) / 2;
1022 
1023  area += sqrt(p*(p-length[0])*(p-length[1])*(p-length[2]));
1024 
1025  // get area of triangle formed by points 2, 3, 0, 2
1026  for (int i = 0; i < 3; i++) {
1027  int idxs[] = { 2, 3, 0, 2 };
1028  int idxa = idxs[i];
1029  int idxb = idxs[i+1];
1030  length[i] = sqrt(sq(quad->p[idxb][0] - quad->p[idxa][0]) +
1031  sq(quad->p[idxb][1] - quad->p[idxa][1]));
1032  }
1033  p = (length[0] + length[1] + length[2]) / 2;
1034 
1035  area += sqrt(p*(p-length[0])*(p-length[1])*(p-length[2]));
1036 
1037  // we don't actually know the family yet (quad detection is generic.)
1038  // This threshold is based on a 6x6 tag (which is actually 8x8)
1039 // int d = fam->d + fam->black_border*2;
1040  int d = 8;
1041  if (area < d*d) {
1042  res = 0;
1043  goto finish;
1044  }
1045  }
1046 
1047  // reject quads whose cumulative angle change isn't equal to 2PI
1048  if (1) {
1049  double total = 0;
1050 
1051  for (int i = 0; i < 4; i++) {
1052  int i0 = i, i1 = (i+1)&3, i2 = (i+2)&3;
1053 
1054  double theta0 = atan2f(quad->p[i0][1] - quad->p[i1][1],
1055  quad->p[i0][0] - quad->p[i1][0]);
1056  double theta1 = atan2f(quad->p[i2][1] - quad->p[i1][1],
1057  quad->p[i2][0] - quad->p[i1][0]);
1058 
1059  double dtheta = theta0 - theta1;
1060  if (dtheta < 0)
1061  dtheta += 2*M_PI;
1062 
1063  if (dtheta < td->qtp.critical_rad || dtheta > (M_PI - td->qtp.critical_rad))
1064  res = 0;
1065 
1066  total += dtheta;
1067  }
1068 
1069  // looking for 2PI
1070  if (total < 6.2 || total > 6.4) {
1071  res = 0;
1072  goto finish;
1073  }
1074  }
1075 
1076  // adjust pixel coordinates; all math up 'til now uses pixel
1077  // coordinates in which (0,0) is the lower left corner. But each
1078  // pixel actually spans from to [x, x+1), [y, y+1) the mean value of which
1079  // is +.5 higher than x & y.
1080 /* double delta = .5;
1081  for (int i = 0; i < 4; i++) {
1082  quad->p[i][0] += delta;
1083  quad->p[i][1] += delta;
1084  }
1085 */
1086  finish:
1087 
1088  free(lfps);
1089 
1090  return res;
1091 }
1092 
1093 #define DO_UNIONFIND(dx, dy) if (im->buf[y*s + dy*s + x + dx] == v) unionfind_connect(uf, y*w + x, y*w + dy*w + x + dx);
1094 
1095 static void do_unionfind_line(unionfind_t *uf, image_u8_t *im, int h, int w, int s, int y)
1096 {
1097  assert(y+1 < im->height);
1098 
1099  for (int x = 1; x < w - 1; x++) {
1100  uint8_t v = im->buf[y*s + x];
1101 
1102  if (v == 127)
1103  continue;
1104 
1105  // (dx,dy) pairs for 8 connectivity:
1106  // (REFERENCE) (1, 0)
1107  // (-1, 1) (0, 1) (1, 1)
1108  //
1109  DO_UNIONFIND(1, 0);
1110  DO_UNIONFIND(0, 1);
1111  if (v == 255) {
1112  DO_UNIONFIND(-1, 1);
1113  DO_UNIONFIND(1, 1);
1114  }
1115  }
1116 }
1117 #undef DO_UNIONFIND
1118 
1119 static void do_unionfind_task(void *p)
1120 {
1121  struct unionfind_task *task = (struct unionfind_task*) p;
1122 
1123  for (int y = task->y0; y < task->y1; y++) {
1124  do_unionfind_line(task->uf, task->im, task->h, task->w, task->s, y);
1125  }
1126 }
1127 
1128 static void do_quad_task(void *p)
1129 {
1130  struct quad_task *task = (struct quad_task*) p;
1131 
1132  zarray_t *clusters = task->clusters;
1133  zarray_t *quads = task->quads;
1134  apriltag_detector_t *td = task->td;
1135  int w = task->w, h = task->h;
1136 
1137  for (int cidx = task->cidx0; cidx < task->cidx1; cidx++) {
1138 
1139  zarray_t *cluster;
1140  zarray_get(clusters, cidx, &cluster);
1141 
1142  if (zarray_size(cluster) < td->qtp.min_cluster_pixels)
1143  continue;
1144 
1145  // a cluster should contain only boundary points around the
1146  // tag. it cannot be bigger than the whole screen. (Reject
1147  // large connected blobs that will be prohibitively slow to
1148  // fit quads to.) A typical point along an edge is added three
1149  // times (because it has 3 neighbors). The maximum perimeter
1150  // is 2w+2h.
1151  if (zarray_size(cluster) > 3*(2*w+2*h)) {
1152  continue;
1153  }
1154 
1155  struct quad quad;
1156  memset(&quad, 0, sizeof(struct quad));
1157 
1158  if (fit_quad(td, task->im, cluster, &quad)) {
1159  pthread_mutex_lock(&td->mutex);
1160 
1161  zarray_add(quads, &quad);
1162  pthread_mutex_unlock(&td->mutex);
1163  }
1164  }
1165 }
1166 
1168 {
1169  int w = im->width, h = im->height, s = im->stride;
1170  assert(w < 32768);
1171  assert(h < 32768);
1172 
1173  image_u8_t *threshim = image_u8_create_alignment(w, h, s);
1174  assert(threshim->stride == s);
1175 
1176  // The idea is to find the maximum and minimum values in a
1177  // window around each pixel. If it's a contrast-free region
1178  // (max-min is small), don't try to binarize. Otherwise,
1179  // threshold according to (max+min)/2.
1180  //
1181  // Mark low-contrast regions with value 127 so that we can skip
1182  // future work on these areas too.
1183 
1184  // however, computing max/min around every pixel is needlessly
1185  // expensive. We compute max/min for tiles. To avoid artifacts
1186  // that arise when high-contrast features appear near a tile
1187  // edge (and thus moving from one tile to another results in a
1188  // large change in max/min value), the max/min values used for
1189  // any pixel are computed from all 3x3 surrounding tiles. Thus,
1190  // the max/min sampling area for nearby pixels overlap by at least
1191  // one tile.
1192  //
1193  // The important thing is that the windows be large enough to
1194  // capture edge transitions; the tag does not need to fit into
1195  // a tile.
1196 
1197  // XXX Tunable. Generally, small tile sizes--- so long as they're
1198  // large enough to span a single tag edge--- seem to be a winner.
1199  const int tilesz = 4;
1200 
1201  // the last (possibly partial) tiles along each row and column will
1202  // just use the min/max value from the last full tile.
1203  int tw = w / tilesz;
1204  int th = h / tilesz;
1205 
1206  uint8_t *im_max = calloc(tw*th, sizeof(uint8_t));
1207  uint8_t *im_min = calloc(tw*th, sizeof(uint8_t));
1208 
1209  // first, collect min/max statistics for each tile
1210  for (int ty = 0; ty < th; ty++) {
1211  for (int tx = 0; tx < tw; tx++) {
1212  uint8_t max = 0, min = 255;
1213 
1214  for (int dy = 0; dy < tilesz; dy++) {
1215 
1216  for (int dx = 0; dx < tilesz; dx++) {
1217 
1218  uint8_t v = im->buf[(ty*tilesz+dy)*s + tx*tilesz + dx];
1219  if (v < min)
1220  min = v;
1221  if (v > max)
1222  max = v;
1223  }
1224  }
1225 
1226  im_max[ty*tw+tx] = max;
1227  im_min[ty*tw+tx] = min;
1228  }
1229  }
1230 
1231  // second, apply 3x3 max/min convolution to "blur" these values
1232  // over larger areas. This reduces artifacts due to abrupt changes
1233  // in the threshold value.
1234  if (1) {
1235  uint8_t *im_max_tmp = calloc(tw*th, sizeof(uint8_t));
1236  uint8_t *im_min_tmp = calloc(tw*th, sizeof(uint8_t));
1237 
1238  for (int ty = 0; ty < th; ty++) {
1239  for (int tx = 0; tx < tw; tx++) {
1240  uint8_t max = 0, min = 255;
1241 
1242  for (int dy = -1; dy <= 1; dy++) {
1243  if (ty+dy < 0 || ty+dy >= th)
1244  continue;
1245  for (int dx = -1; dx <= 1; dx++) {
1246  if (tx+dx < 0 || tx+dx >= tw)
1247  continue;
1248 
1249  uint8_t m = im_max[(ty+dy)*tw+tx+dx];
1250  if (m > max)
1251  max = m;
1252  m = im_min[(ty+dy)*tw+tx+dx];
1253  if (m < min)
1254  min = m;
1255  }
1256  }
1257 
1258  im_max_tmp[ty*tw + tx] = max;
1259  im_min_tmp[ty*tw + tx] = min;
1260  }
1261  }
1262  free(im_max);
1263  free(im_min);
1264  im_max = im_max_tmp;
1265  im_min = im_min_tmp;
1266  }
1267 
1268  for (int ty = 0; ty < th; ty++) {
1269  for (int tx = 0; tx < tw; tx++) {
1270 
1271  int min = im_min[ty*tw + tx];
1272  int max = im_max[ty*tw + tx];
1273 
1274  // low contrast region? (no edges)
1275  if (max - min < td->qtp.min_white_black_diff) {
1276  for (int dy = 0; dy < tilesz; dy++) {
1277  int y = ty*tilesz + dy;
1278 
1279  for (int dx = 0; dx < tilesz; dx++) {
1280  int x = tx*tilesz + dx;
1281 
1282  threshim->buf[y*s+x] = 127;
1283  }
1284  }
1285  continue;
1286  }
1287 
1288  // otherwise, actually threshold this tile.
1289 
1290  // argument for biasing towards dark; specular highlights
1291  // can be substantially brighter than white tag parts
1292  uint8_t thresh = min + (max - min) / 2;
1293 
1294  for (int dy = 0; dy < tilesz; dy++) {
1295  int y = ty*tilesz + dy;
1296 
1297  for (int dx = 0; dx < tilesz; dx++) {
1298  int x = tx*tilesz + dx;
1299 
1300  uint8_t v = im->buf[y*s+x];
1301  if (v > thresh)
1302  threshim->buf[y*s+x] = 255;
1303  else
1304  threshim->buf[y*s+x] = 0;
1305  }
1306  }
1307  }
1308  }
1309 
1310  // we skipped over the non-full-sized tiles above. Fix those now.
1311  if (1) {
1312  for (int y = 0; y < h; y++) {
1313 
1314  // what is the first x coordinate we need to process in this row?
1315 
1316  int x0;
1317 
1318  if (y >= th*tilesz) {
1319  x0 = 0; // we're at the bottom; do the whole row.
1320  } else {
1321  x0 = tw*tilesz; // we only need to do the right most part.
1322  }
1323 
1324  // compute tile coordinates and clamp.
1325  int ty = y / tilesz;
1326  if (ty >= th)
1327  ty = th - 1;
1328 
1329  for (int x = x0; x < w; x++) {
1330  int tx = x / tilesz;
1331  if (tx >= tw)
1332  tx = tw - 1;
1333 
1334  int max = im_max[ty*tw + tx];
1335  int min = im_min[ty*tw + tx];
1336  int thresh = min + (max - min) / 2;
1337 
1338  uint8_t v = im->buf[y*s+x];
1339  if (v > thresh)
1340  threshim->buf[y*s+x] = 255;
1341  else
1342  threshim->buf[y*s+x] = 0;
1343  }
1344  }
1345  }
1346 
1347  free(im_min);
1348  free(im_max);
1349 
1350  // this is a dilate/erode deglitching scheme that does not improve
1351  // anything as far as I can tell.
1352  if (0 || td->qtp.deglitch) {
1353  image_u8_t *tmp = image_u8_create(w, h);
1354 
1355  for (int y = 1; y + 1 < h; y++) {
1356  for (int x = 1; x + 1 < w; x++) {
1357  uint8_t max = 0;
1358  for (int dy = -1; dy <= 1; dy++) {
1359  for (int dx = -1; dx <= 1; dx++) {
1360  uint8_t v = threshim->buf[(y+dy)*s + x + dx];
1361  if (v > max)
1362  max = v;
1363  }
1364  }
1365  tmp->buf[y*s+x] = max;
1366  }
1367  }
1368 
1369  for (int y = 1; y + 1 < h; y++) {
1370  for (int x = 1; x + 1 < w; x++) {
1371  uint8_t min = 255;
1372  for (int dy = -1; dy <= 1; dy++) {
1373  for (int dx = -1; dx <= 1; dx++) {
1374  uint8_t v = tmp->buf[(y+dy)*s + x + dx];
1375  if (v < min)
1376  min = v;
1377  }
1378  }
1379  threshim->buf[y*s+x] = min;
1380  }
1381  }
1382 
1383  image_u8_destroy(tmp);
1384  }
1385 
1386  timeprofile_stamp(td->tp, "threshold");
1387 
1388  return threshim;
1389 }
1390 
1391 // basically the same as threshold(), but assumes the input image is a
1392 // bayer image. It collects statistics separately for each 2x2 block
1393 // of pixels. NOT WELL TESTED.
1395 {
1396  int w = im->width, h = im->height, s = im->stride;
1397 
1398  image_u8_t *threshim = image_u8_create_alignment(w, h, s);
1399  assert(threshim->stride == s);
1400 
1401  int tilesz = 32;
1402  assert((tilesz & 1) == 0); // must be multiple of 2
1403 
1404  int tw = w/tilesz + 1;
1405  int th = h/tilesz + 1;
1406 
1407  uint8_t *im_max[4], *im_min[4];
1408  for (int i = 0; i < 4; i++) {
1409  im_max[i] = calloc(tw*th, sizeof(uint8_t));
1410  im_min[i] = calloc(tw*th, sizeof(uint8_t));
1411  }
1412 
1413  for (int ty = 0; ty < th; ty++) {
1414  for (int tx = 0; tx < tw; tx++) {
1415 
1416  uint8_t max[4] = { 0, 0, 0, 0};
1417  uint8_t min[4] = { 255, 255, 255, 255 };
1418 
1419  for (int dy = 0; dy < tilesz; dy++) {
1420  if (ty*tilesz+dy >= h)
1421  continue;
1422 
1423  for (int dx = 0; dx < tilesz; dx++) {
1424  if (tx*tilesz+dx >= w)
1425  continue;
1426 
1427  // which bayer element is this pixel?
1428  int idx = (2*(dy&1) + (dx&1));
1429 
1430  uint8_t v = im->buf[(ty*tilesz+dy)*s + tx*tilesz + dx];
1431  if (v < min[idx])
1432  min[idx] = v;
1433  if (v > max[idx])
1434  max[idx] = v;
1435  }
1436  }
1437 
1438  for (int i = 0; i < 4; i++) {
1439  im_max[i][ty*tw+tx] = max[i];
1440  im_min[i][ty*tw+tx] = min[i];
1441  }
1442  }
1443  }
1444 
1445  for (int ty = 0; ty < th; ty++) {
1446  for (int tx = 0; tx < tw; tx++) {
1447 
1448  uint8_t max[4] = { 0, 0, 0, 0};
1449  uint8_t min[4] = { 255, 255, 255, 255 };
1450 
1451  for (int dy = -1; dy <= 1; dy++) {
1452  if (ty+dy < 0 || ty+dy >= th)
1453  continue;
1454  for (int dx = -1; dx <= 1; dx++) {
1455  if (tx+dx < 0 || tx+dx >= tw)
1456  continue;
1457 
1458  for (int i = 0; i < 4; i++) {
1459  uint8_t m = im_max[i][(ty+dy)*tw+tx+dx];
1460  if (m > max[i])
1461  max[i] = m;
1462  m = im_min[i][(ty+dy)*tw+tx+dx];
1463  if (m < min[i])
1464  min[i] = m;
1465  }
1466  }
1467  }
1468 
1469  // XXX CONSTANT
1470 // if (max - min < 30)
1471 // continue;
1472 
1473  // argument for biasing towards dark: specular highlights
1474  // can be substantially brighter than white tag parts
1475  uint8_t thresh[4];
1476  for (int i = 0; i < 4; i++) {
1477  thresh[i] = min[i] + (max[i] - min[i]) / 2;
1478  }
1479 
1480  for (int dy = 0; dy < tilesz; dy++) {
1481  int y = ty*tilesz + dy;
1482  if (y >= h)
1483  continue;
1484 
1485  for (int dx = 0; dx < tilesz; dx++) {
1486  int x = tx*tilesz + dx;
1487  if (x >= w)
1488  continue;
1489 
1490  // which bayer element is this pixel?
1491  int idx = (2*(y&1) + (x&1));
1492 
1493  uint8_t v = im->buf[y*s+x];
1494  threshim->buf[y*s+x] = v > thresh[idx];
1495  }
1496  }
1497  }
1498  }
1499 
1500  for (int i = 0; i < 4; i++) {
1501  free(im_min[i]);
1502  free(im_max[i]);
1503  }
1504 
1505  timeprofile_stamp(td->tp, "threshold");
1506 
1507  return threshim;
1508 }
1509 
1511 {
1513  // step 1. threshold the image, creating the edge image.
1514 
1515  int w = im->width, h = im->height;
1516 
1517  image_u8_t *threshim = threshold(td, im);
1518  int ts = threshim->stride;
1519 
1520  if (td->debug)
1521  image_u8_write_pnm(threshim, "debug_threshold.pnm");
1522 
1524  // step 2. find connected components.
1525 
1526  unionfind_t *uf = unionfind_create(w * h);
1527 
1528  if (td->nthreads <= 1) {
1529  for (int y = 0; y < h - 1; y++) {
1530  do_unionfind_line(uf, threshim, h, w, ts, y);
1531  }
1532  } else {
1533  int sz = h - 1;
1534  int chunksize = 1 + sz / (APRILTAG_TASKS_PER_THREAD_TARGET * td->nthreads);
1535  struct unionfind_task tasks[sz / chunksize + 1];
1536 
1537  int ntasks = 0;
1538 
1539  for (int i = 0; i < sz; i += chunksize) {
1540  // each task will process [y0, y1). Note that this attaches
1541  // each cell to the right and down, so row y1 *is* potentially modified.
1542  //
1543  // for parallelization, make sure that each task doesn't touch rows
1544  // used by another thread.
1545  tasks[ntasks].y0 = i;
1546  tasks[ntasks].y1 = imin(sz, i + chunksize - 1);
1547  tasks[ntasks].h = h;
1548  tasks[ntasks].w = w;
1549  tasks[ntasks].s = ts;
1550  tasks[ntasks].uf = uf;
1551  tasks[ntasks].im = threshim;
1552 
1553  workerpool_add_task(td->wp, do_unionfind_task, &tasks[ntasks]);
1554  ntasks++;
1555  }
1556 
1557  workerpool_run(td->wp);
1558 
1559  // XXX stitch together the different chunks.
1560  for (int i = 0; i + 1 < ntasks; i++) {
1561  do_unionfind_line(uf, threshim, h, w, ts, tasks[i].y1);
1562  }
1563  }
1564 
1565  timeprofile_stamp(td->tp, "unionfind");
1566 
1567  // XXX sizing??
1568  int nclustermap = 2*w*h - 1;
1569 
1570  struct uint64_zarray_entry **clustermap = calloc(nclustermap, sizeof(struct uint64_zarray_entry*));
1571 
1572  for (int y = 1; y < h-1; y++) {
1573  for (int x = 1; x < w-1; x++) {
1574 
1575  uint8_t v0 = threshim->buf[y*ts + x];
1576  if (v0 == 127)
1577  continue;
1578 
1579  // XXX don't query this until we know we need it?
1580  uint64_t rep0 = unionfind_get_representative(uf, y*w + x);
1581 
1582  // whenever we find two adjacent pixels such that one is
1583  // white and the other black, we add the point half-way
1584  // between them to a cluster associated with the unique
1585  // ids of the white and black regions.
1586  //
1587  // We additionally compute the gradient direction (i.e., which
1588  // direction was the white pixel?) Note: if (v1-v0) == 255, then
1589  // (dx,dy) points towards the white pixel. if (v1-v0) == -255, then
1590  // (dx,dy) points towards the black pixel. p.gx and p.gy will thus
1591  // be -255, 0, or 255.
1592  //
1593  // Note that any given pixel might be added to multiple
1594  // different clusters. But in the common case, a given
1595  // pixel will be added multiple times to the same cluster,
1596  // which increases the size of the cluster and thus the
1597  // computational costs.
1598  //
1599  // A possible optimization would be to combine entries
1600  // within the same cluster.
1601 
1602 #define DO_CONN(dx, dy) \
1603  if (1) { \
1604  uint8_t v1 = threshim->buf[y*ts + dy*ts + x + dx]; \
1605  \
1606  if (v0 + v1 == 255) { \
1607  uint64_t rep1 = unionfind_get_representative(uf, y*w + dy*w + x + dx); \
1608  uint64_t clusterid; \
1609  if (rep0 < rep1) \
1610  clusterid = (rep1 << 32) + rep0; \
1611  else \
1612  clusterid = (rep0 << 32) + rep1; \
1613  \
1614  /* XXX lousy hash function */ \
1615  uint32_t clustermap_bucket = u64hash_2(clusterid) % nclustermap; \
1616  struct uint64_zarray_entry *entry = clustermap[clustermap_bucket]; \
1617  while (entry && entry->id != clusterid) { \
1618  entry = entry->next; \
1619  } \
1620  \
1621  if (!entry) { \
1622  entry = calloc(1, sizeof(struct uint64_zarray_entry)); \
1623  entry->id = clusterid; \
1624  entry->cluster = zarray_create(sizeof(struct pt)); \
1625  entry->next = clustermap[clustermap_bucket]; \
1626  clustermap[clustermap_bucket] = entry; \
1627  } \
1628  \
1629  struct pt p = { .x = 2*x + dx, .y = 2*y + dy, .gx = dx*((int) v1-v0), .gy = dy*((int) v1-v0)}; \
1630  zarray_add(entry->cluster, &p); \
1631  } \
1632  }
1633 
1634  // do 4 connectivity. NB: Arguments must be [-1, 1] or we'll overflow .gx, .gy
1635  DO_CONN(1, 0);
1636  DO_CONN(0, 1);
1637 
1638  // do 8 connectivity
1639  DO_CONN(-1, 1);
1640  DO_CONN(1, 1);
1641  }
1642  }
1643 #undef DO_CONN
1644 
1645  image_u8_destroy(threshim);
1646 
1647  // make segmentation image.
1648  if (td->debug) {
1649  image_u8x3_t *d = image_u8x3_create(w, h);
1650 
1651  uint32_t *colors = (uint32_t*) calloc(w*h, sizeof(*colors));
1652 
1653  for (int y = 0; y < h; y++) {
1654  for (int x = 0; x < w; x++) {
1655  uint32_t v = unionfind_get_representative(uf, y*w+x);
1656 
1658  continue;
1659 
1660  uint32_t color = colors[v];
1661  uint8_t r = color >> 16,
1662  g = color >> 8,
1663  b = color;
1664 
1665  if (color == 0) {
1666  const int bias = 50;
1667  r = bias + (random() % (200-bias));
1668  g = bias + (random() % (200-bias));
1669  b = bias + (random() % (200-bias));
1670  colors[v] = (r << 16) | (g << 8) | b;
1671  }
1672 
1673  d->buf[y*d->stride + 3*x + 0] = r;
1674  d->buf[y*d->stride + 3*x + 1] = g;
1675  d->buf[y*d->stride + 3*x + 2] = b;
1676  }
1677  }
1678 
1679  free(colors);
1680 
1681  image_u8x3_write_pnm(d, "debug_segmentation.pnm");
1682  image_u8x3_destroy(d);
1683  }
1684 
1685  timeprofile_stamp(td->tp, "make clusters");
1686 
1688  // step 3. process each connected component.
1689  zarray_t *clusters = zarray_create(sizeof(zarray_t*)); //, uint64_zarray_hash_size(clustermap));
1690  if (1) {
1691  for (int i = 0; i < nclustermap; i++) {
1692 
1693  for (struct uint64_zarray_entry *entry = clustermap[i]; entry; entry = entry->next) {
1694  // XXX reject clusters here?
1695  zarray_add(clusters, &entry->cluster);
1696  }
1697  }
1698  }
1699 
1700 
1701  if (td->debug) {
1702  image_u8x3_t *d = image_u8x3_create(w, h);
1703 
1704  for (int i = 0; i < zarray_size(clusters); i++) {
1705  zarray_t *cluster;
1706  zarray_get(clusters, i, &cluster);
1707 
1708  uint32_t r, g, b;
1709 
1710  if (1) {
1711  const int bias = 50;
1712  r = bias + (random() % (200-bias));
1713  g = bias + (random() % (200-bias));
1714  b = bias + (random() % (200-bias));
1715  }
1716 
1717  for (int j = 0; j < zarray_size(cluster); j++) {
1718  struct pt *p;
1719  zarray_get_volatile(cluster, j, &p);
1720 
1721  int x = p->x / 2;
1722  int y = p->y / 2;
1723  d->buf[y*d->stride + 3*x + 0] = r;
1724  d->buf[y*d->stride + 3*x + 1] = g;
1725  d->buf[y*d->stride + 3*x + 2] = b;
1726  }
1727  }
1728 
1729  image_u8x3_write_pnm(d, "debug_clusters.pnm");
1730  image_u8x3_destroy(d);
1731  }
1732 
1733  if (1) {
1734  for (int i = 0; i < nclustermap; i++) {
1735  struct uint64_zarray_entry *entry = clustermap[i];
1736  while (entry) {
1737  struct uint64_zarray_entry *tmp = entry->next;
1738  free(entry);
1739  entry = tmp;
1740  }
1741  }
1742  free(clustermap);
1743  }
1744 
1745  zarray_t *quads = zarray_create(sizeof(struct quad));
1746 
1747  int sz = zarray_size(clusters);
1748  int chunksize = 1 + sz / (APRILTAG_TASKS_PER_THREAD_TARGET * td->nthreads);
1749  struct quad_task tasks[sz / chunksize + 1];
1750 
1751  int ntasks = 0;
1752  for (int i = 0; i < sz; i += chunksize) {
1753  tasks[ntasks].td = td;
1754  tasks[ntasks].cidx0 = i;
1755  tasks[ntasks].cidx1 = imin(sz, i + chunksize);
1756  tasks[ntasks].h = h;
1757  tasks[ntasks].w = w;
1758  tasks[ntasks].quads = quads;
1759  tasks[ntasks].clusters = clusters;
1760  tasks[ntasks].im = im;
1761 
1762  workerpool_add_task(td->wp, do_quad_task, &tasks[ntasks]);
1763  ntasks++;
1764  }
1765 
1766  workerpool_run(td->wp);
1767 
1768  timeprofile_stamp(td->tp, "fit quads to clusters");
1769 
1770  if (td->debug) {
1771  FILE *f = fopen("debug_lines.ps", "w");
1772  fprintf(f, "%%!PS\n\n");
1773 
1774  image_u8_t *im2 = image_u8_copy(im);
1775  image_u8_darken(im2);
1776  image_u8_darken(im2);
1777 
1778  // assume letter, which is 612x792 points.
1779  double scale = fmin(612.0/im->width, 792.0/im2->height);
1780  fprintf(f, "%.15f %.15f scale\n", scale, scale);
1781  fprintf(f, "0 %d translate\n", im2->height);
1782  fprintf(f, "1 -1 scale\n");
1783 
1784  postscript_image(f, im);
1785 
1786  for (int i = 0; i < zarray_size(quads); i++) {
1787  struct quad *q;
1788  zarray_get_volatile(quads, i, &q);
1789 
1790  float rgb[3];
1791  int bias = 100;
1792 
1793  for (int i = 0; i < 3; i++)
1794  rgb[i] = bias + (random() % (255-bias));
1795 
1796  fprintf(f, "%f %f %f setrgbcolor\n", rgb[0]/255.0f, rgb[1]/255.0f, rgb[2]/255.0f);
1797  fprintf(f, "%.15f %.15f moveto %.15f %.15f lineto %.15f %.15f lineto %.15f %.15f lineto %.15f %.15f lineto stroke\n",
1798  q->p[0][0], q->p[0][1],
1799  q->p[1][0], q->p[1][1],
1800  q->p[2][0], q->p[2][1],
1801  q->p[3][0], q->p[3][1],
1802  q->p[0][0], q->p[0][1]);
1803  }
1804 
1805  fclose(f);
1806  }
1807 
1808  // printf(" %d %d %d %d\n", indices[0], indices[1], indices[2], indices[3]);
1809 
1810  unionfind_destroy(uf);
1811 
1812  for (int i = 0; i < zarray_size(clusters); i++) {
1813  zarray_t *cluster;
1814  zarray_get(clusters, i, &cluster);
1815  zarray_destroy(cluster);
1816  }
1817 
1818  zarray_destroy(clusters);
1819 
1820 
1821  return quads;
1822 }
static unionfind_t * unionfind_create(uint32_t maxid)
Definition: unionfind.h:58
#define APRILTAG_TASKS_PER_THREAD_TARGET
Definition: apriltag.h:49
image_u8_t * im
static void zarray_get_volatile(const zarray_t *za, int idx, void *p)
Definition: zarray.h:218
int quad_segment_agg(apriltag_detector_t *td, zarray_t *cluster, struct line_fit_pt *lfps, int indices[4])
static void timeprofile_stamp(timeprofile_t *tp, const char *name)
Definition: timeprofile.h:83
zarray_t * clusters
pthread_mutex_t mutex
Definition: apriltag.h:200
static int zarray_size(const zarray_t *za)
Definition: zarray.h:136
static uint32_t unionfind_get_representative(unionfind_t *uf, uint32_t id)
Definition: unionfind.h:95
static void zarray_destroy(zarray_t *za)
Definition: zarray.h:76
static void zarray_set(zarray_t *za, int idx, const void *p, void *outp)
Definition: zarray.h:345
apriltag_detector_t * td
zarray_t * quads
int image_u8_write_pnm(const image_u8_t *im, const char *path)
Definition: image_u8.c:205
image_u8_t * threshold_bayer(apriltag_detector_t *td, image_u8_t *im)
image_u8_t * image_u8_create(unsigned int width, unsigned int height)
Definition: image_u8.c:59
static int imax(int a, int b)
Definition: math_util.h:165
static void ptsort(struct pt *pts, int sz)
#define MAYBE_SWAP(arr, apos, bpos)
struct uint64_zarray_entry * next
timeprofile_t * tp
Definition: apriltag.h:182
void zmaxheap_destroy(zmaxheap_t *heap)
Definition: zmaxheap.c:105
const int32_t height
Definition: image_types.h:14
static uint32_t unionfind_get_set_size(unionfind_t *uf, uint32_t id)
Definition: unionfind.h:118
static zarray_t * zarray_create(size_t el_sz)
Definition: zarray.h:63
const int32_t width
Definition: image_types.h:13
float p[4][2]
Definition: apriltag.h:53
int16_t gy
char * data
Definition: zarray.h:55
void zmaxheap_add(zmaxheap_t *heap, void *p, float v)
Definition: zmaxheap.c:139
uint8_t * buf
Definition: image_types.h:27
#define MERGE(apos, bpos)
const int32_t stride
Definition: image_types.h:15
struct apriltag_quad_thresh_params qtp
Definition: apriltag.h:178
zarray_t * apriltag_quad_thresh(apriltag_detector_t *td, image_u8_t *im)
void workerpool_add_task(workerpool_t *wp, void(*f)(void *p), void *p)
Definition: workerpool.c:162
#define DO_CONN(dx, dy)
int16_t gx
int fit_quad(apriltag_detector_t *td, image_u8_t *im, zarray_t *cluster, struct quad *quad)
image_u8_t * image_u8_copy(const image_u8_t *in)
Definition: image_u8.c:74
int zmaxheap_remove_max(zmaxheap_t *heap, void *p, float *v)
Definition: zmaxheap.c:248
uint16_t x
image_u8_t * image_u8_create_alignment(unsigned int width, unsigned int height, unsigned int alignment)
Definition: image_u8.c:64
static double sq(double v)
Definition: math_util.h:84
static void do_unionfind_task(void *p)
int pt_compare_theta(const void *_a, const void *_b)
static void do_quad_task(void *p)
static void unionfind_destroy(unionfind_t *uf)
Definition: unionfind.h:70
const int32_t stride
Definition: image_types.h:25
static int imin(int a, int b)
Definition: math_util.h:160
static void do_unionfind_line(unionfind_t *uf, image_u8_t *im, int h, int w, int s, int y)
static void zarray_truncate(zarray_t *za, int sz)
Definition: zarray.h:228
uint16_t y
uint8_t * buf
Definition: image_types.h:17
#define DO_UNIONFIND(dx, dy)
image_u8x3_t * image_u8x3_create(unsigned int width, unsigned int height)
Definition: image_u8x3.c:37
static void zarray_get(const zarray_t *za, int idx, void *p)
Definition: zarray.h:201
void workerpool_run(workerpool_t *wp)
Definition: workerpool.c:183
Definition: zarray.h:49
#define min(A, B)
Definition: math_util.h:59
int size
Definition: zarray.h:53
int quad_segment_maxima(apriltag_detector_t *td, zarray_t *cluster, struct line_fit_pt *lfps, int indices[4])
#define M_PI
void image_u8x3_destroy(image_u8x3_t *im)
Definition: image_u8x3.c:72
workerpool_t * wp
Definition: apriltag.h:197
Definition: apriltag.h:51
void image_u8_destroy(image_u8_t *im)
Definition: image_u8.c:87
int err_compare_descending(const void *_a, const void *_b)
image_u8_t * threshold(apriltag_detector_t *td, image_u8_t *im)
void fit_line(struct line_fit_pt *lfps, int sz, int i0, int i1, double *lineparm, double *err, double *mse)
int image_u8x3_write_pnm(const image_u8x3_t *im, const char *path)
Definition: image_u8x3.c:132
static void postscript_image(FILE *f, image_u8_t *im)
uint64_t id
void image_u8_darken(image_u8_t *im)
Definition: image_u8.c:293
static uint32_t u64hash_2(uint64_t x)
zarray_t * cluster
float theta
#define ASSOC
zmaxheap_t * zmaxheap_create(size_t el_sz)
Definition: zmaxheap.c:92
#define max(A, B)
Definition: math_util.h:58
static void zarray_add(zarray_t *za, const void *p)
Definition: zarray.h:185


apriltags2
Author(s): Danylo Malyuta
autogenerated on Fri Oct 19 2018 04:02:32