FloatingPointComparision.hpp
Go to the documentation of this file.
1 // Copyright 2005, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee)
31 //
32 // The Google C++ Testing Framework (Google Test)
33 
34 
35 
36 // This template class represents an IEEE floating-point number
37 // (either single-precision or double-precision, depending on the
38 // template parameters).
39 //
40 // The purpose of this class is to do more sophisticated number
41 // comparison. (Due to round-off error, etc, it's very unlikely that
42 // two floating-points will be equal exactly. Hence a naive
43 // comparison by the == operation often doesn't work.)
44 //
45 // Format of IEEE floating-point:
46 //
47 // The most-significant bit being the leftmost, an IEEE
48 // floating-point looks like
49 //
50 // sign_bit exponent_bits fraction_bits
51 //
52 // Here, sign_bit is a single bit that designates the sign of the
53 // number.
54 //
55 // For float, there are 8 exponent bits and 23 fraction bits.
56 //
57 // For double, there are 11 exponent bits and 52 fraction bits.
58 //
59 // More details can be found at
60 // http://en.wikipedia.org/wiki/IEEE_floating-point_standard.
61 //
62 // Template parameter:
63 //
64 // RawType: the raw floating-point type (either float or double)
65 
66 #ifndef ApproxMVBB_Common_FloatingPointComparision_hpp
67 #define ApproxMVBB_Common_FloatingPointComparision_hpp
68 
69 template <size_t size>
70 class TypeWithSize {
71  public:
72  // This prevents the user from using TypeWithSize<N> with incorrect
73  // values of N.
74  typedef void UInt;
75 };
76 
77 // The specialization for size 4.
78 template <>
79 class TypeWithSize<4> {
80  public:
81  // unsigned int has size 4 in both gcc and MSVC.
82  //
83  // As base/basictypes.h doesn't compile on Windows, we cannot use
84  // uint32, uint64, and etc here.
85  typedef int Int;
86  typedef unsigned int UInt;
87 };
88 
89 // The specialization for size 8.
90 template <>
91 class TypeWithSize<8> {
92  public:
93 #if GTEST_OS_WINDOWS
94  typedef __int64 Int;
95  typedef unsigned __int64 UInt;
96 #else
97  typedef long long Int; // NOLINT
98  typedef unsigned long long UInt; // NOLINT
99 #endif // GTEST_OS_WINDOWS
100 };
101 
102 
103 
104 template <typename RawType>
106  public:
107  // Defines the unsigned integer type that has the same size as the
108  // floating point number.
110 
111  // Constants.
112 
113  // # of bits in a number.
114  static const size_t kBitCount = 8*sizeof(RawType);
115 
116  // # of fraction bits in a number.
117  static const size_t kFractionBitCount =
118  std::numeric_limits<RawType>::digits - 1;
119 
120  // # of exponent bits in a number.
121  static const size_t kExponentBitCount = kBitCount - 1 - kFractionBitCount;
122 
123  // The mask for the sign bit.
124  static const Bits kSignBitMask = static_cast<Bits>(1) << (kBitCount - 1);
125 
126  // The mask for the fraction bits.
127  static const Bits kFractionBitMask =
128  ~static_cast<Bits>(0) >> (kExponentBitCount + 1);
129 
130  // The mask for the exponent bits.
131  static const Bits kExponentBitMask = ~(kSignBitMask | kFractionBitMask);
132 
133  // How many ULP's (Units in the Last Place) we want to tolerate when
134  // comparing two numbers. The larger the value, the more error we
135  // allow. A 0 value means that two numbers must be exactly the same
136  // to be considered equal.
137  //
138  // The maximum error of a single floating-point operation is 0.5
139  // units in the last place. On Intel CPU's, all floating-point
140  // calculations are done with 80-bit precision, while double has 64
141  // bits. Therefore, 4 should be enough for ordinary use.
142  //
143  // See the following article for more details on ULP:
144  // http://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/
145  static const size_t kMaxUlps = 4;
146 
147  // Constructs a FloatingPoint from a raw floating-point number.
148  //
149  // On an Intel CPU, passing a non-normalized NAN (Not a Number)
150  // around may change its bits, although the new value is guaranteed
151  // to be also a NAN. Therefore, don't expect this constructor to
152  // preserve the bits in x when x is a NAN.
153  explicit FloatingPoint(const RawType& x) { u_.value_ = x; }
154 
155  // Static methods
156 
157  // Reinterprets a bit pattern as a floating-point number.
158  //
159  // This function is needed to test the AlmostEquals() method.
160  static RawType ReinterpretBits(const Bits bits) {
161  FloatingPoint fp(0);
162  fp.u_.bits_ = bits;
163  return fp.u_.value_;
164  }
165 
166  // Returns the floating-point number that represent positive infinity.
167  static RawType Infinity() {
168  return ReinterpretBits(kExponentBitMask);
169  }
170 
171  // Returns the maximum representable finite floating-point number.
172  static RawType Max();
173 
174  // Non-static methods
175 
176  // Returns the bits that represents this number.
177  const Bits &bits() const { return u_.bits_; }
178 
179  // Returns the exponent bits of this number.
180  Bits exponent_bits() const { return kExponentBitMask & u_.bits_; }
181 
182  // Returns the fraction bits of this number.
183  Bits fraction_bits() const { return kFractionBitMask & u_.bits_; }
184 
185  // Returns the sign bit of this number.
186  Bits sign_bit() const { return kSignBitMask & u_.bits_; }
187 
188  // Returns true iff this is NAN (not a number).
189  bool is_nan() const {
190  // It's a NAN if the exponent bits are all ones and the fraction
191  // bits are not entirely zeros.
192  return (exponent_bits() == kExponentBitMask) && (fraction_bits() != 0);
193  }
194 
195  // Returns true iff this number is at most kMaxUlps ULP's away from
196  // rhs. In particular, this function:
197  //
198  // - returns false if either number is (or both are) NAN.
199  // - treats really large numbers as almost equal to infinity.
200  // - thinks +0.0 and -0.0 are 0 DLP's apart.
201  bool AlmostEquals(const FloatingPoint& rhs) const {
202  // The IEEE standard says that any comparison operation involving
203  // a NAN must return false.
204  if (is_nan() || rhs.is_nan()) return false;
205 
206  return DistanceBetweenSignAndMagnitudeNumbers(u_.bits_, rhs.u_.bits_)
207  <= kMaxUlps;
208  }
209 
210  private:
211  // The data type used to store the actual floating-point number.
213  RawType value_; // The raw floating-point number.
214  Bits bits_; // The bits that represent the number.
215  };
216 
217  // Converts an integer from the sign-and-magnitude representation to
218  // the biased representation. More precisely, let N be 2 to the
219  // power of (kBitCount - 1), an integer x is represented by the
220  // unsigned number x + N.
221  //
222  // For instance,
223  //
224  // -N + 1 (the most negative number representable using
225  // sign-and-magnitude) is represented by 1;
226  // 0 is represented by N; and
227  // N - 1 (the biggest number representable using
228  // sign-and-magnitude) is represented by 2N - 1.
229  //
230  // Read http://en.wikipedia.org/wiki/Signed_number_representations
231  // for more details on signed number representations.
232  static Bits SignAndMagnitudeToBiased(const Bits &sam) {
233  if (kSignBitMask & sam) {
234  // sam represents a negative number.
235  return ~sam + 1;
236  } else {
237  // sam represents a positive number.
238  return kSignBitMask | sam;
239  }
240  }
241 
242  // Given two numbers in the sign-and-magnitude representation,
243  // returns the distance between them as an unsigned number.
244  static Bits DistanceBetweenSignAndMagnitudeNumbers(const Bits &sam1,
245  const Bits &sam2) {
246  const Bits biased1 = SignAndMagnitudeToBiased(sam1);
247  const Bits biased2 = SignAndMagnitudeToBiased(sam2);
248  return (biased1 >= biased2) ? (biased1 - biased2) : (biased2 - biased1);
249  }
250 
252 };
253 
254 #endif
static Bits SignAndMagnitudeToBiased(const Bits &sam)
FloatingPointUnion u_
static Bits DistanceBetweenSignAndMagnitudeNumbers(const Bits &sam1, const Bits &sam2)
static RawType ReinterpretBits(const Bits bits)
bool AlmostEquals(const FloatingPoint &rhs) const
TypeWithSize< sizeof(RawType)>::UInt Bits
const Bits & bits() const
FloatingPoint(const RawType &x)
static RawType Infinity()


asr_approx_mvbb
Author(s): Gassner Nikolai
autogenerated on Mon Jun 10 2019 12:38:08