df_diff.cpp
Go to the documentation of this file.
1 //==============================================================================
2 //
3 // This file is part of GNSSTk, the ARL:UT GNSS Toolkit.
4 //
5 // The GNSSTk is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU Lesser General Public License as published
7 // by the Free Software Foundation; either version 3.0 of the License, or
8 // any later version.
9 //
10 // The GNSSTk is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU Lesser General Public License for more details.
14 //
15 // You should have received a copy of the GNU Lesser General Public
16 // License along with GNSSTk; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA
18 //
19 // This software was developed by Applied Research Laboratories at the
20 // University of Texas at Austin.
21 // Copyright 2004-2022, The Board of Regents of The University of Texas System
22 //
23 //==============================================================================
24 
25 //==============================================================================
26 //
27 // This software was developed by Applied Research Laboratories at the
28 // University of Texas at Austin, under contract to an agency or agencies
29 // within the U.S. Department of Defense. The U.S. Government retains all
30 // rights to use, duplicate, distribute, disclose, or release this software.
31 //
32 // Pursuant to DoD Directive 523024
33 //
34 // DISTRIBUTION STATEMENT A: This software has been approved for public
35 // release, distribution is unlimited.
36 //
37 //==============================================================================
38 
39 // This program is intended to help differencing data files and not
40 // failing on small differences in floating point values.
41 
42 
43 #include <iostream>
44 #include <fstream>
45 #include <string>
46 #include <cmath>
47 #include <set>
48 
49 #include "BasicFramework.hpp"
50 #include "StringUtils.hpp"
51 
52 using namespace std;
53 
55 {
56 public:
57  DFDiff(const string& applName)
58  noexcept
60  applName,
61  "Differences two input files while allowing small differences"
62  " in floating point values."),
63  epsilon(1e-5),
64  linesToSkip(0),
65  lastlineValue(0),
66  totalLines(0)
67  {};
68 
69 
72  {
73  unsigned col;
74  string regex;
75  };
76 
77  // While this is in C11, we don't want to work under C03
78  double stringToDouble(const string& s, bool& isDouble)
79  {
80  char *p;
81  double v = strtod(s.c_str(), &p);
82  if (p == s.c_str())
83  isDouble = false;
84  else
85  isDouble = true;
86  return v;
87  }
88 
89  bool initialize(int argc, char *argv[]) noexcept
90  {
92  input1Option('1', "input1", "First file to take the input from.", true),
93  input2Option('2', "input2", "Second file to take the input from.", true),
94  lineSkipOption('l', "lines", "Number of lines to skip at beginning of file."),
95  epsilonOption('e', "epsilon", "Percent allowable difference in floating point values."),
96  outputOption('o', "output", "A file to receive the output. The default is stdout."),
97  regexOption('X', "regexclude", "Exclude lines matching a regular"
98  " expression"),
99  igregOption('I', "ign-reg", "Ignore column X (starting with 0) on"
100  " lines matching regular expression Y, ARG=X,Y"),
101  lastLineOption('z', "last", "ignore the last X lines of the file");
102 
103  if (!BasicFramework::initialize(argc,argv))
104  return false;
105 
106  input1Fn = input1Option.getValue()[0];
107  input2Fn = input2Option.getValue()[0];
108 
109  input1.open(input1Fn.c_str(), istringstream::in);
110  input2.open(input2Fn.c_str(), istringstream::in);
111 
112  if (!input1)
113  {
114  cerr << "Could not open: " << input1Fn << endl;
115  exitCode=1;
116  return false;
117  }
118 
119  if (!input1)
120  {
121  cerr << "Could not open: " << input2Fn << endl;
122  exitCode=1;
123  return false;
124  }
125 
126  // Determine total number of lines in input file 1
127  string line;
128  while (getline(input1, line))
129  totalLines++;
130 
131  if (debugLevel)
132  cout << "File 1 has " << totalLines << " lines" << endl;
133 
134  // Determine how many lines to ignore at the end of the file
135  if (lastLineOption.getCount())
136  lastlineValue = gnsstk::StringUtils::asInt(lastLineOption.getValue()[0]);
137 
138  totalLines = totalLines - lastlineValue;
139 
140  // clear and reset the buffer for input file 1
141  input1.clear();
142  input1.seekg(0,ios::beg);
143 
144  if (outputOption.getCount())
145  outputFn = outputOption.getValue()[0];
146 
147  if (outputFn=="-" || outputFn=="")
148  {
149  output.copyfmt(cout);
150  output.clear(cout.rdstate());
151  output.ios::rdbuf(cout.rdbuf());
152  outputFn = "<stdout>";
153  }
154  else
155  {
156  output.open(outputFn.c_str(), ios::out);
157  }
158 
159  if (!output)
160  {
161  cerr << "Could not open: " << outputFn << endl;
162  exitCode=1;
163  return false;
164  }
165 
166  if (epsilonOption.getCount())
167  epsilon = gnsstk::StringUtils::asDouble(epsilonOption.getValue()[0]);
168 
169  if (lineSkipOption.getCount())
170  linesToSkip = gnsstk::StringUtils::asInt(lineSkipOption.getValue()[0]);
171 
172  if (regexOption.getCount())
173  regexclude = regexOption.getValue();
174 
175  if (igregOption.getCount())
176  {
177  vector<string> igvec(igregOption.getValue());
178  for (unsigned i = 0; i < igvec.size(); i++)
179  {
180  if (gnsstk::StringUtils::numWords(igvec[i],',') < 2)
181  {
182  cerr << "Invalid spec \"" << igvec[i]
183  << "\", expecting column,regex" << endl;
185  return false;
186  }
187  string colStr = gnsstk::StringUtils::firstWord(igvec[i],',');
189  {
190  cerr << "Invalid spec \"" << igvec[i]
191  << "\", expecting column,regex" << endl;
193  return false;
194  }
195  RegExcludeCol rec;
196  rec.col = gnsstk::StringUtils::asUnsigned(colStr);
197  rec.regex = gnsstk::StringUtils::words(igvec[i],1,string::npos,',');
198  recs.push_back(rec);
199  }
200  }
201 
202  if (debugLevel)
203  output << "First file " << input1Fn << endl
204  << "Second file " << input2Fn << endl
205  << "Output file " << outputFn << endl
206  << "Epsilon " << epsilon << endl
207  << "Skipping " << linesToSkip << " lines at beginning" << endl
208  << "Skipping " << lastlineValue << " lines at end" << endl;
209 
210  return true;
211  }
212 
213 protected:
214  virtual void spinUp()
215  {}
216 
217  virtual void process()
218  {
219  try
220  {
221  for (long lineNumber = 1; lineNumber < totalLines; lineNumber++)
222  {
223  string line1, line2;
224  if (!getline(input1, line1) || !getline(input2, line2))
225  {
226  exitCode++;
227  break;
228  }
229 
230  if (lineNumber <= linesToSkip)
231  continue;
232 
233  // Try to match BOTH lines with each user-specified
234  // regular expression. If they both match, skip the
235  // line. If only one matches, that counts as a
236  // difference.
237  bool skipregex = false;
238  for (unsigned i = 0; i < regexclude.size(); i++)
239  {
240  if (gnsstk::StringUtils::isLike(line1, regexclude[i]) &&
241  gnsstk::StringUtils::isLike(line2, regexclude[i]))
242  {
243  skipregex = true;
244  break;
245  }
246  }
247  if (skipregex)
248  continue;
249 
250  string s1, s2;
251  istringstream ss1(line1);
252  istringstream ss2(line2);
253  bool lineDiff = false;
254  unsigned column = 0;
255  set<unsigned> skipCols;
256  // collect the columns to ignore due to matching regular expr.
257  for (unsigned i = 0; i < recs.size(); i++)
258  {
259  if (gnsstk::StringUtils::isLike(line1, recs[i].regex) &&
260  gnsstk::StringUtils::isLike(line2, recs[i].regex))
261  {
262  skipCols.insert(recs[i].col);
263  }
264  }
265  while ((ss1 >> s1) && (ss2 >> s2))
266  {
267  if (skipCols.count(column++))
268  {
269  continue;
270  }
271  if (s1 != s2)
272  {
273  bool df1,df2;
274  double d1 = stringToDouble(s1, df1);
275  double d2 = stringToDouble(s2, df2);
276  if (df1 && df2)
277  {
278  double diff = d1-d2;
279  double err = d2;
280  if (d1 != 0)
281  err = diff/d1;
282 
283  if (abs(err) > epsilon)
284  {
285  exitCode += 1;
286  lineDiff = true;
287  }
288  }
289  else
290  {
291  exitCode += 1;
292  lineDiff = true;
293  }
294 
295  if (verboseLevel && lineDiff)
296  {
297  output << "f1, " << lineNumber << ":" << line1 << endl
298  << "f2, " << lineNumber << ":" << line2 << endl;
299  }
300  }
301  }
302  }
303  }
304  catch (std::exception& e)
305  {
306  cout << e.what() << endl;
307  exitCode += 1;
308  }
309 
310  if (verboseLevel)
311  output << "Total differences: " << exitCode << endl;
312 
313  }
314 
315  virtual void shutDown()
316  {}
317 
318  string input1Fn, input2Fn, outputFn;
319  ofstream output;
320  ifstream input1, input2;
321  double epsilon;
322  vector<string> regexclude;
323  vector<RegExcludeCol> recs;
324 public:
328 };
329 
330 
331 int main(int argc, char *argv[])
332 {
333  DFDiff crap(argv[0]);
334 
335  if (crap.initialize(argc, argv))
336  crap.run();
337 
338  return crap.exitCode;
339 }
340 
gnsstk::StringUtils::asInt
long asInt(const std::string &s)
Definition: StringUtils.hpp:713
StringUtils.hpp
DFDiff::lastlineValue
int lastlineValue
Definition: df_diff.cpp:327
DFDiff::RegExcludeCol::col
unsigned col
Definition: df_diff.cpp:73
gnsstk::CommandOption::getCount
virtual unsigned long getCount() const
Definition: CommandOption.hpp:188
DFDiff::spinUp
virtual void spinUp()
Definition: df_diff.cpp:214
gnsstk::CommandOptionWithAnyArg
Definition: CommandOption.hpp:342
DFDiff::outputFn
string outputFn
Definition: df_diff.cpp:318
DFDiff::initialize
bool initialize(int argc, char *argv[]) noexcept
Definition: df_diff.cpp:89
gnsstk::StringUtils::words
std::string words(const std::string &s, const std::string::size_type firstWord=0, const std::string::size_type numWords=std::string::npos, const char delimiter=' ')
Definition: StringUtils.hpp:2199
DFDiff::DFDiff
DFDiff(const string &applName) noexcept
Definition: df_diff.cpp:57
initialize
int initialize(string &errors)
Definition: RinEdit.cpp:513
gnsstk::StringUtils::numWords
int numWords(const std::string &s, const char delimiter=' ')
Definition: StringUtils.hpp:2171
gnsstk::BasicFramework
Definition: BasicFramework.hpp:387
DFDiff::totalLines
long totalLines
Definition: df_diff.cpp:326
main
int main(int argc, char *argv[])
Definition: df_diff.cpp:331
DFDiff::process
virtual void process()
Definition: df_diff.cpp:217
DFDiff::output
ofstream output
Definition: df_diff.cpp:319
gnsstk::BasicFramework::OPTION_ERROR
static const int OPTION_ERROR
Definition: BasicFramework.hpp:399
DFDiff::recs
vector< RegExcludeCol > recs
Definition: df_diff.cpp:323
example4.err
err
Definition: example4.py:126
DFDiff::regexclude
vector< string > regexclude
Definition: df_diff.cpp:322
DFDiff::epsilon
double epsilon
Definition: df_diff.cpp:321
gnsstk::StringUtils::asDouble
double asDouble(const std::string &s)
Definition: StringUtils.hpp:705
gnsstk::BasicFramework::run
bool run() noexcept
Definition: BasicFramework.cpp:126
DFDiff
Definition: df_diff.cpp:54
gnsstk::StringUtils::isLike
bool isLike(const std::string &s, const std::string &aPattern, const char zeroOrMore=' *', const char oneOrMore='+', const char anyChar='.')
Definition: StringUtils.hpp:948
gnsstk::StringUtils::asUnsigned
unsigned long asUnsigned(const std::string &s)
Definition: StringUtils.hpp:721
BasicFramework.hpp
std
Definition: Angle.hpp:142
gnsstk::BasicFramework::exitCode
int exitCode
Definition: BasicFramework.hpp:450
gnsstk::CommandOption::getValue
const std::vector< std::string > & getValue() const
Definition: CommandOption.hpp:194
gnsstk::StringUtils::firstWord
std::string firstWord(const std::string &s, const char delimiter=' ')
Definition: StringUtils.hpp:2138
DFDiff::input2
ifstream input2
Definition: df_diff.cpp:320
DFDiff::stringToDouble
double stringToDouble(const string &s, bool &isDouble)
Definition: df_diff.cpp:78
DFDiff::linesToSkip
long linesToSkip
Definition: df_diff.cpp:325
gnsstk::StringUtils::isDigitString
bool isDigitString(const std::string &s)
Definition: StringUtils.hpp:1871
DFDiff::shutDown
virtual void shutDown()
Definition: df_diff.cpp:315
DFDiff::RegExcludeCol::regex
string regex
Definition: df_diff.cpp:74
DFDiff::RegExcludeCol
Structure that defines column and regular expression to ignore.
Definition: df_diff.cpp:71


gnsstk
Author(s):
autogenerated on Wed Oct 25 2023 02:40:38