asr_approx_mvbb: FloatingPointComparision.hpp Source File

Go to the documentation of this file.
00001 // Copyright 2005, Google Inc.
00002 // All rights reserved.
00003 //
00004 // Redistribution and use in source and binary forms, with or without
00005 // modification, are permitted provided that the following conditions are
00006 // met:
00007 //
00008 //     * Redistributions of source code must retain the above copyright
00009 // notice, this list of conditions and the following disclaimer.
00010 //     * Redistributions in binary form must reproduce the above
00011 // copyright notice, this list of conditions and the following disclaimer
00012 // in the documentation and/or other materials provided with the
00013 // distribution.
00014 //     * Neither the name of Google Inc. nor the names of its
00015 // contributors may be used to endorse or promote products derived from
00016 // this software without specific prior written permission.
00017 //
00018 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00019 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00020 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00021 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
00022 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00023 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00024 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00025 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00026 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00027 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
00028 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00029 //
00030 // Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee)
00031 //
00032 // The Google C++ Testing Framework (Google Test)
00033 
00034 
00035 
00036 // This template class represents an IEEE floating-point number
00037 // (either single-precision or double-precision, depending on the
00038 // template parameters).
00039 //
00040 // The purpose of this class is to do more sophisticated number
00041 // comparison.  (Due to round-off error, etc, it's very unlikely that
00042 // two floating-points will be equal exactly.  Hence a naive
00043 // comparison by the == operation often doesn't work.)
00044 //
00045 // Format of IEEE floating-point:
00046 //
00047 //   The most-significant bit being the leftmost, an IEEE
00048 //   floating-point looks like
00049 //
00050 //     sign_bit exponent_bits fraction_bits
00051 //
00052 //   Here, sign_bit is a single bit that designates the sign of the
00053 //   number.
00054 //
00055 //   For float, there are 8 exponent bits and 23 fraction bits.
00056 //
00057 //   For double, there are 11 exponent bits and 52 fraction bits.
00058 //
00059 //   More details can be found at
00060 //   http://en.wikipedia.org/wiki/IEEE_floating-point_standard.
00061 //
00062 // Template parameter:
00063 //
00064 //   RawType: the raw floating-point type (either float or double)
00065 
00066 #ifndef ApproxMVBB_Common_FloatingPointComparision_hpp
00067 #define ApproxMVBB_Common_FloatingPointComparision_hpp
00068 
00069 template <size_t size>
00070 class TypeWithSize {
00071  public:
00072   // This prevents the user from using TypeWithSize<N> with incorrect
00073   // values of N.
00074   typedef void UInt;
00075 };
00076 
00077 // The specialization for size 4.
00078 template <>
00079 class TypeWithSize<4> {
00080  public:
00081   // unsigned int has size 4 in both gcc and MSVC.
00082   //
00083   // As base/basictypes.h doesn't compile on Windows, we cannot use
00084   // uint32, uint64, and etc here.
00085   typedef int Int;
00086   typedef unsigned int UInt;
00087 };
00088 
00089 // The specialization for size 8.
00090 template <>
00091 class TypeWithSize<8> {
00092  public:
00093 #if GTEST_OS_WINDOWS
00094   typedef __int64 Int;
00095   typedef unsigned __int64 UInt;
00096 #else
00097   typedef long long Int;  // NOLINT
00098   typedef unsigned long long UInt;  // NOLINT
00099 #endif  // GTEST_OS_WINDOWS
00100 };
00101 
00102 
00103 
00104 template <typename RawType>
00105 class FloatingPoint {
00106  public:
00107   // Defines the unsigned integer type that has the same size as the
00108   // floating point number.
00109   typedef typename TypeWithSize<sizeof(RawType)>::UInt Bits;
00110 
00111   // Constants.
00112 
00113   // # of bits in a number.
00114   static const size_t kBitCount = 8*sizeof(RawType);
00115 
00116   // # of fraction bits in a number.
00117   static const size_t kFractionBitCount =
00118     std::numeric_limits<RawType>::digits - 1;
00119 
00120   // # of exponent bits in a number.
00121   static const size_t kExponentBitCount = kBitCount - 1 - kFractionBitCount;
00122 
00123   // The mask for the sign bit.
00124   static const Bits kSignBitMask = static_cast<Bits>(1) << (kBitCount - 1);
00125 
00126   // The mask for the fraction bits.
00127   static const Bits kFractionBitMask =
00128     ~static_cast<Bits>(0) >> (kExponentBitCount + 1);
00129 
00130   // The mask for the exponent bits.
00131   static const Bits kExponentBitMask = ~(kSignBitMask | kFractionBitMask);
00132 
00133   // How many ULP's (Units in the Last Place) we want to tolerate when
00134   // comparing two numbers.  The larger the value, the more error we
00135   // allow.  A 0 value means that two numbers must be exactly the same
00136   // to be considered equal.
00137   //
00138   // The maximum error of a single floating-point operation is 0.5
00139   // units in the last place.  On Intel CPU's, all floating-point
00140   // calculations are done with 80-bit precision, while double has 64
00141   // bits.  Therefore, 4 should be enough for ordinary use.
00142   //
00143   // See the following article for more details on ULP:
00144   // http://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/
00145   static const size_t kMaxUlps = 4;
00146 
00147   // Constructs a FloatingPoint from a raw floating-point number.
00148   //
00149   // On an Intel CPU, passing a non-normalized NAN (Not a Number)
00150   // around may change its bits, although the new value is guaranteed
00151   // to be also a NAN.  Therefore, don't expect this constructor to
00152   // preserve the bits in x when x is a NAN.
00153   explicit FloatingPoint(const RawType& x) { u_.value_ = x; }
00154 
00155   // Static methods
00156 
00157   // Reinterprets a bit pattern as a floating-point number.
00158   //
00159   // This function is needed to test the AlmostEquals() method.
00160   static RawType ReinterpretBits(const Bits bits) {
00161     FloatingPoint fp(0);
00162     fp.u_.bits_ = bits;
00163     return fp.u_.value_;
00164   }
00165 
00166   // Returns the floating-point number that represent positive infinity.
00167   static RawType Infinity() {
00168     return ReinterpretBits(kExponentBitMask);
00169   }
00170 
00171   // Returns the maximum representable finite floating-point number.
00172   static RawType Max();
00173 
00174   // Non-static methods
00175 
00176   // Returns the bits that represents this number.
00177   const Bits &bits() const { return u_.bits_; }
00178 
00179   // Returns the exponent bits of this number.
00180   Bits exponent_bits() const { return kExponentBitMask & u_.bits_; }
00181 
00182   // Returns the fraction bits of this number.
00183   Bits fraction_bits() const { return kFractionBitMask & u_.bits_; }
00184 
00185   // Returns the sign bit of this number.
00186   Bits sign_bit() const { return kSignBitMask & u_.bits_; }
00187 
00188   // Returns true iff this is NAN (not a number).
00189   bool is_nan() const {
00190     // It's a NAN if the exponent bits are all ones and the fraction
00191     // bits are not entirely zeros.
00192     return (exponent_bits() == kExponentBitMask) && (fraction_bits() != 0);
00193   }
00194 
00195   // Returns true iff this number is at most kMaxUlps ULP's away from
00196   // rhs.  In particular, this function:
00197   //
00198   //   - returns false if either number is (or both are) NAN.
00199   //   - treats really large numbers as almost equal to infinity.
00200   //   - thinks +0.0 and -0.0 are 0 DLP's apart.
00201   bool AlmostEquals(const FloatingPoint& rhs) const {
00202     // The IEEE standard says that any comparison operation involving
00203     // a NAN must return false.
00204     if (is_nan() || rhs.is_nan()) return false;
00205 
00206     return DistanceBetweenSignAndMagnitudeNumbers(u_.bits_, rhs.u_.bits_)
00207         <= kMaxUlps;
00208   }
00209 
00210  private:
00211   // The data type used to store the actual floating-point number.
00212   union FloatingPointUnion {
00213     RawType value_;  // The raw floating-point number.
00214     Bits bits_;      // The bits that represent the number.
00215   };
00216 
00217   // Converts an integer from the sign-and-magnitude representation to
00218   // the biased representation.  More precisely, let N be 2 to the
00219   // power of (kBitCount - 1), an integer x is represented by the
00220   // unsigned number x + N.
00221   //
00222   // For instance,
00223   //
00224   //   -N + 1 (the most negative number representable using
00225   //          sign-and-magnitude) is represented by 1;
00226   //   0      is represented by N; and
00227   //   N - 1  (the biggest number representable using
00228   //          sign-and-magnitude) is represented by 2N - 1.
00229   //
00230   // Read http://en.wikipedia.org/wiki/Signed_number_representations
00231   // for more details on signed number representations.
00232   static Bits SignAndMagnitudeToBiased(const Bits &sam) {
00233     if (kSignBitMask & sam) {
00234       // sam represents a negative number.
00235       return ~sam + 1;
00236     } else {
00237       // sam represents a positive number.
00238       return kSignBitMask | sam;
00239     }
00240   }
00241 
00242   // Given two numbers in the sign-and-magnitude representation,
00243   // returns the distance between them as an unsigned number.
00244   static Bits DistanceBetweenSignAndMagnitudeNumbers(const Bits &sam1,
00245                                                      const Bits &sam2) {
00246     const Bits biased1 = SignAndMagnitudeToBiased(sam1);
00247     const Bits biased2 = SignAndMagnitudeToBiased(sam2);
00248     return (biased1 >= biased2) ? (biased1 - biased2) : (biased2 - biased1);
00249   }
00250 
00251   FloatingPointUnion u_;
00252 };
00253 
00254 #endif