RobustStats.cpp
Go to the documentation of this file.
1 //==============================================================================
2 //
3 // This file is part of GNSSTk, the ARL:UT GNSS Toolkit.
4 //
5 // The GNSSTk is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU Lesser General Public License as published
7 // by the Free Software Foundation; either version 3.0 of the License, or
8 // any later version.
9 //
10 // The GNSSTk is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU Lesser General Public License for more details.
14 //
15 // You should have received a copy of the GNU Lesser General Public
16 // License along with GNSSTk; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA
18 //
19 // This software was developed by Applied Research Laboratories at the
20 // University of Texas at Austin.
21 // Copyright 2004-2022, The Board of Regents of The University of Texas System
22 //
23 //==============================================================================
24 
25 //==============================================================================
26 //
27 // This software was developed by Applied Research Laboratories at the
28 // University of Texas at Austin, under contract to an agency or agencies
29 // within the U.S. Department of Defense. The U.S. Government retains all
30 // rights to use, duplicate, distribute, disclose, or release this software.
31 //
32 // Pursuant to DoD Directive 523024
33 //
34 // DISTRIBUTION STATEMENT A: This software has been approved for public
35 // release, distribution is unlimited.
36 //
37 //==============================================================================
38 
47 //------------------------------------------------------------------------------------
48 // GNSSTk includes
49 #include "RobustStats.hpp"
50 #include "Exception.hpp"
51 #include "Matrix.hpp"
52 #include "SRI.hpp"
53 #include "StringUtils.hpp"
54 
55 //------------------------------------------------------------------------------------
56 // moved to RobustStats.hpp as macros
57 // const double gnsstk::Robust::TuningT=1.5; // or 1.345; // or 1.5
58 // const double gnsstk::Robust::TuningA=0.778; // or 0.67; // or 0.778
59 // const double gnsstk::Robust::TuningE=0.6745;
60 
61 //------------------------------------------------------------------------------------
62 using namespace std;
63 using namespace gnsstk;
64 using namespace StringUtils;
65 
66 //------------------------------------------------------------------------------------
67 inline long Stem(double x, double& scale) { return (long(x / scale)); }
68 
69 //------------------------------------------------------------------------------------
70 void Robust::StemLeafPlot(std::ostream& os, double *xd, long nd,
71  const std::string& msg)
72 {
73  long stem, l, nout = 0, s, sM, sQ1, sQ3, sOH, sOL;
74  int i, sign, pos, k, leaf;
75  unsigned len = 0, kk;
76  char c;
77  double x, scale;
78  string buf;
79 
80  if (!xd || nd < 2)
81  {
82  GNSSTK_THROW(Exception("Invalid input"));
83  }
84 
85  // find range
86  double range = xd[nd - 1] - xd[0]; // max - min
87  if (range < 0.0)
88  {
89  GNSSTK_THROW(Exception("Array is not sorted"));
90  }
91  if (range == 0.0)
92  {
93  range = xd[0]; // GNSSTK_THROW(Exception("Array has zero range"));
94  }
95 
96  // find scale
97  scale = 0.0;
98  short nscale = 0; // scale = 1*10^nscale
99  if (range >= 10.0)
100  {
101  sign = 1;
102  }
103  else if (range < 1.0)
104  {
105  sign = -1;
106  }
107  else
108  {
109  scale = 1.0;
110  }
111 
112  if (!scale)
113  {
114  do
115  {
116  nscale += sign;
117  } while (range * ::pow(10.0, -nscale) < 1.0 ||
118  range * ::pow(10.0, -nscale) >= 10.0);
119  }
120 
121  scale = ::pow(10.0, nscale);
122 
123  double M = Robust::Median(xd, nd);
124  double Q1, Q3;
125  Robust::Quartiles(xd, nd, Q1, Q3);
126  // outlier limits
127  double OH = 2.5 * Q3 - 1.5 * Q1; // outlier high limit
128  double OL = 2.5 * Q1 - 1.5 * Q3; // outlier low limit ('oh L' not 'zero L')
129 
130  // starting stem=stem(min=xd[0]), and stem step==scale
131  i = 1 + short((range / scale) + 0.5); // number of stems
132  if (xd[0] * xd[nd - 1] < 0.0)
133  {
134  i++; // add one stem for zero
135  }
136  if (nd > 8 && i < 8 && xd[nd - 1] != xd[0])
137  { // fudge so #stems is big enough
138  scale /= 10.0;
139  nscale--;
140  }
141 
142  // find length of stem for printing
143  // must look through all data b/c leaf==10 can bump up the stem
144  len = 0;
145  for (l = 0; l < nd; l++)
146  {
147  // this code is duplicated below in the main loop
148  sign = 1;
149  if (xd[l] < 0.0)
150  {
151  sign = -1;
152  }
153  stem = Stem(fabs(xd[l]), scale);
154  x = 10 * fabs(xd[l] / scale - sign * stem);
155  leaf = short(x + 0.5);
156  if (leaf == 10)
157  {
158  stem++;
159  leaf = 0;
160  }
161  stem = sign * stem;
162  buf = asString<long>(stem);
163  if (len < buf.size())
164  {
165  len = buf.size();
166  }
167  }
168 
169  // loop through data, adding stems and leaves to plot
170  bool start = true;
171  if (xd[0] < 0.0)
172  {
173  pos = -1;
174  }
175  else
176  {
177  pos = 1;
178  }
179  sM = Stem(M, scale);
180  sQ1 = Stem(Q1, scale);
181  sQ3 = Stem(Q3, scale);
182  sOH = Stem(OH, scale);
183  sOL = Stem(OL, scale);
184  for (l = 0; l < nd; l++)
185  {
186  // current: stem=s,pos; data=stem,sign(xd[l])
187  if (xd[l] > OH || xd[l] < OL)
188  {
189  nout++; // count outliers
190  }
191  sign = 1;
192  if (xd[l] < 0.0)
193  {
194  sign = -1;
195  }
196  stem = Stem(fabs(xd[l]), scale);
197  x = 10 * fabs(xd[l] / scale - sign * stem);
198  leaf = short(x + 0.5);
199  if (leaf == 10)
200  {
201  stem++;
202  leaf = 0;
203  }
204  stem = sign * stem;
205 
206  // print it
207  if (start || s != stem || (s == 0 && pos * sign < 0.0))
208  {
209  // Change of stem -> print
210  if (start)
211  { // first time through
212  os << "Stem and Leaf Plot (scale "; // print scale
213  if (nscale < -8 || nscale > 8)
214  {
215  os << "1.0e" << nscale;
216  }
217  else
218  {
219  i = nscale;
220  if (nscale < 0)
221  {
222  os << "0.";
223  i++;
224  k = 1;
225  }
226  else
227  {
228  os << "1";
229  k = -1;
230  }
231  while (i != 0)
232  {
233  os << "0";
234  i += k;
235  }
236  if (nscale < 0)
237  {
238  os << "1";
239  }
240  else
241  {
242  os << ".0";
243  }
244  }
245  os << ", " << nd << "pts) : "; // print npts
246  if (msg.size() > 0)
247  {
248  os << msg; // and message
249  }
250  s = stem - 1; // save for later
251  start = false;
252  }
253 
254  while (s < stem || (s == 0 && pos * sign < 0))
255  { // also print stems without leaves
256  if (s != 0)
257  {
258  s++;
259  }
260  else if (pos < 0)
261  {
262  pos = 1;
263  }
264  else
265  {
266  s++;
267  }
268 
269  // print the new line with stem s
270  os << "\n";
271  buf = asString<long>(s < 0 ? -s : s); // abs(stem)
272 
273  if (s < 0)
274  {
275  c = '-'; // sign of stem
276  }
277  else if (s > 0)
278  {
279  c = '+';
280  }
281  else if (pos > 0)
282  {
283  c = '+';
284  }
285  else
286  {
287  c = '-';
288  }
289  os << c;
290 
291  for (kk = buf.size(); kk < len; kk++)
292  {
293  os << " "; // pad out to length
294  }
295  os << buf << " "; // stem/axis space
296 
297  // now print either |, M (median), Q (quartiles), or >< (outliers)
298  k = 0;
299 
300  // if s==sM
301  if (s == sM && (s != 0 || pos * M > 0.0))
302  {
303  os << "M"; // marks the median
304  k++;
305  }
306 
307  if ((s == sQ3 && (s != 0 || pos * Q3 < 0.0)) ||
308  (s == sQ1 && (s != 0 || pos * Q1 < 0.0)))
309  {
310  os << "Q"; // marks a quartile
311  k++;
312  }
313 
314  if ((s < sOL || (s == 0 && sOL == 0 && (pos == -1 && OL > 0.0))) ||
315  (s == sOL && (s != 0 || pos * OL > 0.0)))
316  {
317  os << "<"; // marks an outlier (small)
318  k++;
319  }
320  else if ((s > sOH ||
321  (s == 0 && sOH == 0 && (pos == 1 && OH < 0.0))) ||
322  (s == sOH && (s != 0 || pos * OH > 0.0)))
323  {
324  os << ">"; // marks an outlier (big)
325  k++;
326  }
327 
328  if (k == 0)
329  {
330  os << "|"; // marks a regular point
331  k++;
332  }
333 
334  while (k < 3)
335  {
336  os << " ";
337  k++;
338  }
339  }
340  } // end change stem
341 
342  // print leaf
343  os << leaf;
344  }
345 
346  os << "\nEND Stem and Leaf Plot (there are " << nout << " outliers.)\n";
347 
348 } // end StemLeafPlot
349 
350 void Robust::Quantiles(double *xd, long nd)
351 {
352  if (!xd || nd < 2)
353  {
354  Exception e("Invalid input");
355  GNSSTK_THROW(e);
356  }
357 
358  double f;
359  for (int i = 0; i < nd; i++)
360  {
361  f = double(8 * i + 5) / double(8 * nd + 2); // f(i) = i-3/8 / n+1/4, i=1,n
362  xd[i] = 4.91 * (::pow(f, 0.14) - ::pow(1 - f, 0.14));
363  }
364 
365 } // end Quantiles
366 
367 #define SEQUENTIAL 1 // don't see much difference in timing...
368 int Robust::RobustPolyFit(double *xd, const double *td, int nd, int N,
369  double *c, double *w)
370 {
371  try
372  {
373  if (!xd || !td || !c || nd < 2)
374  {
375  Exception e("Invalid input");
376  GNSSTK_THROW(e);
377  }
378 
379  const int maxiter = 50;
380  const double conv_limit = ::sqrt(double(nd)) * 1.e-3;
381  int i, j, k, niter;
382  double x0 = xd[0], t0 = td[0], mad, median, conv;
383 #ifdef SEQUENTIAL
384  double fit, dt;
385  Matrix<double> R, A(1, N + 1), invR;
386  Vector<double> Z;
387 #else
388  Matrix<double> PT, P(nd, N, 1.0);
389  Vector<double> D(nd);
390 #endif
391  Matrix<double> Cov;
392  Vector<double> Wts(nd, 1.0), Coeff(N, 0.0), Res(nd), ResCopy(nd);
393 
394 #ifndef SEQUENTIAL
395  // build the data vector and the (constant) partials matrix
396  for (i = 0; i < nd; i++)
397  {
398  D(i) = xd[i] - x0;
399  for (j = 1; j < N; j++)
400  P(i, j) = P(i, j - 1) * (td[i] - t0);
401  }
402 #endif
403 
404  // iterate until weights don't change
405  niter = 0;
406  while (1)
407  {
408 #ifdef SEQUENTIAL
409  R = Matrix<double>(N, N, 0.0);
410  Z = Vector<double>(N, 0.0);
411  // loop over the data
412  for (i = 0; i < nd; i++)
413  {
414  // if(Wts(i) < 1.e-8) continue; // ignore if weight is
415  // very small
416  A(0, N) = (xd[i] - x0) * Wts(i); // data
417  dt = td[i] - t0;
418  A(0, 0) = 1.0 * Wts(i); // partials
419  for (j = 1; j < N; j++)
420  A(0, j) = A(0, j - 1) * dt;
421  SrifMU<double>(R, Z, A, 1);
422  }
423 #else
424  // compute partials transpose multiplied by 'weight
425  // matrix'=diag(squared wts)
426  PT = transpose(P);
427  for (i = 0; i < N; i++)
428  {
429  for (j = 0; j < nd; j++)
430  {
431  PT(i, j) *= Wts(j) * Wts(j);
432  }
433  }
434  Cov = PT * P; // information matrix
435 #endif
436 
437  // solve
438  try
439  {
440 #ifdef SEQUENTIAL
441  invR = inverseUT(R, &mad, &median);
442  Cov = UTtimesTranspose(invR);
443  Coeff = invR * Z;
444 #else
445  Cov = inverse(Cov);
446  Coeff = Cov * PT * D;
447 #endif
448  }
449  catch (Exception& e)
450  {
451  return -1;
452  }
453 
454  // compute residuals
455 #ifdef SEQUENTIAL
456  // loop over the data
457  for (i = 0; i < nd; i++)
458  {
459  fit = Coeff(N - 1); // fit = partials * coeff
460  dt = td[i] - t0; // delta time
461  for (j = N - 2; j >= 0; j--)
462  {
463  fit = fit * dt + Coeff(j);
464  }
465  ResCopy(i) = Res(i) = xd[i] - x0 - fit; // data - fit
466  }
467 #else
468  ResCopy = Res = D - P * Coeff;
469 #endif
470 
471  // compute median and MAD. NB Median() will sort the vector...
472  mad = MedianAbsoluteDeviation(&(ResCopy[0]), ResCopy.size(), median);
473 
474  // recompute weights
475  Vector<double> OldWts(Wts);
476  for (i = 0; i < nd; i++)
477  {
478  if (Res(i) < -RobustTuningT * mad)
479  {
480  Wts(i) = -RobustTuningT * mad / Res(i);
481  }
482  else if (Res(i) > RobustTuningT * mad)
483  {
484  Wts(i) = RobustTuningT * mad / Res(i);
485  }
486  else
487  {
488  Wts(i) = 1.0;
489  }
490  }
491 
492  // test for convergence
493  niter++;
494  conv = RMS(OldWts - Wts);
495  if (niter > maxiter)
496  {
497  break; // return -2;
498  }
499  if (conv > 1.)
500  {
501  break; // return -3;
502  }
503  if (niter > 2 && conv < conv_limit)
504  {
505  break;
506  }
507  }
508 
509  // copy out weights, residuals and solution
510  for (i = 0; i < N; i++)
511  {
512  c[i] = Coeff(i);
513  }
514 
515  // c[0] += x0;
516  for (i = 0; i < nd; i++)
517  {
518  xd[i] = Res(i);
519  if (w)
520  {
521  w[i] = Wts(i);
522  }
523  }
524  if (niter > maxiter)
525  {
526  return -2;
527  }
528  if (conv > 1.)
529  {
530  return -3;
531  }
532 
533 #undef SEQUENTIAL
534  return 0;
535  }
536  catch (Exception& e)
537  {
538  GNSSTK_RETHROW(e);
539  }
540  catch (std::exception& e)
541  {
542  Exception E("std except: " + string(e.what()));
543  GNSSTK_THROW(E);
544  }
545  catch (...)
546  {
547  Exception e("Unknown exception");
548  GNSSTK_THROW(e);
549  }
550 } // end RobustPolyFit
551 
552  /* Anderson-Darling test statistic, which is a variant of the
553  Kolmogorov-Smirnoff test, comparing the distribution of data with mean m and
554  standard deviation s to the normal distribution.
555  @param xd array of data.
556  @param nd length of array xd.
557  @param mean mean of the data.
558  @param stddev standard deviation of the data.
559  @param save_flag if true (default) array xd will NOT be changed, otherwise
560  it will be sorted. */
561 double gnsstk::ADtest(double *xd, const int nd, double mean, double stddev,
562  bool save_flag)
563 {
564  if (!xd || nd < 2)
565  {
566  Exception e("Invalid input");
567  GNSSTK_THROW(e);
568  }
569 
570  try
571  {
572  int i;
573  double med, *save;
574 
575  if (save_flag)
576  {
577  save = new double[nd];
578  if (!save)
579  {
580  Exception e("Could not allocate temporary array");
581  GNSSTK_THROW(e);
582  }
583  for (i = 0; i < nd; i++)
584  {
585  save[i] = xd[i];
586  }
587  }
588  QSort(xd, nd);
589 
590  double tn = double(nd);
591  double AD = -tn, cdf;
592  for (i = 0; i < nd; i++)
593  {
594  cdf = normalCDF<double>(mean, stddev, xd[i]);
595  // cout << "CDF " << setw(4) << i
596  // << " " << fixed << setprecision(6) << setw(11) << xd[i]
597  // << " " << setw(11) << cdf << endl;
598  AD -= ((2.0 * double(i) - 1.0) * ::log(cdf) +
599  (2.0 * (tn - double(i)) + 1.0) * ::log(1.0 - cdf)) /
600  tn;
601  }
602 
603  AD *= 1.0 + (0.75 + 2.25 / tn) / tn;
604 
605  if (save_flag)
606  {
607  for (i = 0; i < nd; i++)
608  xd[i] = save[i];
609  delete[] save;
610  }
611 
612  return AD;
613  }
614  catch (Exception& e)
615  {
616  GNSSTK_RETHROW(e);
617  }
618  catch (std::exception& e)
619  {
620  Exception E("std except: " + string(e.what()));
621  GNSSTK_THROW(E);
622  }
623  catch (...)
624  {
625  Exception e("Unknown exception");
626  GNSSTK_THROW(e);
627  }
628 }
629 
630 //------------------------------------------------------------------------------------
631 //------------------------------------------------------------------------------------
gnsstk::UTtimesTranspose
Matrix< T > UTtimesTranspose(const Matrix< T > &UT)
Definition: SRIMatrix.hpp:725
gnsstk::ADtest
double ADtest(double *xd, const int nd, double m, double s, bool save_flag=true)
Definition: RobustStats.cpp:561
gnsstk::Robust::Quantiles
void Quantiles(double *xd, long nd)
Definition: RobustStats.cpp:350
gnsstk::Robust::Median
T Median(T *xd, const int nd, bool save_flag=true)
Definition: RobustStats.hpp:361
StringUtils.hpp
gnsstk::inverseUT
Matrix< T > inverseUT(const Matrix< T > &UT, T *ptrSmall=NULL, T *ptrBig=NULL)
Definition: SRIMatrix.hpp:636
SRI.hpp
gnsstk::Robust::Quartiles
void Quartiles(const T *xd, const int nd, T &Q1, T &Q3)
Definition: RobustStats.hpp:425
gnsstk
For Sinex::InputHistory.
Definition: BasicFramework.cpp:50
gnsstk::Exception
Definition: Exception.hpp:151
gnsstk::transpose
SparseMatrix< T > transpose(const SparseMatrix< T > &M)
transpose
Definition: SparseMatrix.hpp:829
gnsstk::Matrix< double >
nd
int nd
Definition: IERS1996NutationData.hpp:44
log
#define log
Definition: DiscCorr.cpp:625
GNSSTK_RETHROW
#define GNSSTK_RETHROW(exc)
Definition: Exception.hpp:369
example4.pos
pos
Definition: example4.py:125
Stem
long Stem(double x, double &scale)
Definition: RobustStats.cpp:67
gnsstk::Vector< double >
gnsstk::TrackingCode::P
@ P
Legacy GPS precise code.
RobustTuningT
#define RobustTuningT
tuning constant used in M-estimate and Robust least squares (SRIFilter.cpp)
Definition: RobustStats.hpp:63
RobustStats.hpp
gnsstk::Vector::size
size_t size() const
STL size.
Definition: Vector.hpp:207
Exception.hpp
std
Definition: Angle.hpp:142
gnsstk::QSort
void QSort(T *sa, int na, int(*comp)(const T &, const T &)=gnsstk::Qsort_compare)
Definition: RobustStats.hpp:139
gnsstk::range
double range(const Position &A, const Position &B)
Definition: Position.cpp:1273
GNSSTK_THROW
#define GNSSTK_THROW(exc)
Definition: Exception.hpp:366
Matrix.hpp
gnsstk::Robust::RobustPolyFit
int RobustPolyFit(double *xd, const double *td, int nd, int n, double *c, double *w=NULL)
Definition: RobustStats.cpp:368
gnsstk::inverse
SparseMatrix< T > inverse(const SparseMatrix< T > &A)
Definition: SparseMatrix.hpp:1890
gnsstk::median
T median(const Vector< T > &v)
Compute the median of a gnsstk::Vector.
Definition: Stats.hpp:59
gnsstk::RMS
T RMS(const ConstVectorBase< T, BaseClass > &l)
Definition: VectorOperators.hpp:197
gnsstk::mad
T mad(const gnsstk::Vector< T > &v)
median absolute deviation of a gnsstk::Vector
Definition: Stats.hpp:85
gnsstk::Robust::StemLeafPlot
void StemLeafPlot(std::ostream &os, double *xd, long nd, const std::string &msg=std::string(""))
Definition: RobustStats.cpp:70
gnsstk::Robust::MedianAbsoluteDeviation
T MedianAbsoluteDeviation(T *xd, int nd, T &M, bool save_flag=true)
Definition: RobustStats.hpp:467


gnsstk
Author(s):
autogenerated on Wed Oct 25 2023 02:40:41