threemxl: half.h Source File

Go to the documentation of this file.
00001 
00002 //
00003 // Copyright (c) 2002, Industrial Light & Magic, a division of Lucas
00004 // Digital Ltd. LLC
00005 // 
00006 // All rights reserved.
00007 // 
00008 // Redistribution and use in source and binary forms, with or without
00009 // modification, are permitted provided that the following conditions are
00010 // met:
00011 // *       Redistributions of source code must retain the above copyright
00012 // notice, this list of conditions and the following disclaimer.
00013 // *       Redistributions in binary form must reproduce the above
00014 // copyright notice, this list of conditions and the following disclaimer
00015 // in the documentation and/or other materials provided with the
00016 // distribution.
00017 // *       Neither the name of Industrial Light & Magic nor the names of
00018 // its contributors may be used to endorse or promote products derived
00019 // from this software without specific prior written permission. 
00020 // 
00021 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00022 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00023 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00024 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
00025 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00026 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00027 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00028 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00029 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00030 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
00031 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00032 //
00034 
00035 // Primary authors:
00036 //     Florian Kainz <kainz@ilm.com>
00037 //     Rod Bogart <rgb@ilm.com>
00038 
00039 //---------------------------------------------------------------------------
00040 //
00041 //      half -- a 16-bit floating point number class:
00042 //
00043 //      Type half can represent positive and negative numbers whose
00044 //      magnitude is between roughly 6.1e-5 and 6.5e+4 with a relative
00045 //      error of 9.8e-4; numbers smaller than 6.1e-5 can be represented
00046 //      with an absolute error of 6.0e-8.  All integers from -2048 to
00047 //      +2048 can be represented exactly.
00048 //
00049 //      Type half behaves (almost) like the built-in C++ floating point
00050 //      types.  In arithmetic expressions, half, float and double can be
00051 //      mixed freely.  Here are a few examples:
00052 //
00053 //          half a (3.5);
00054 //          float b (a + sqrt (a));
00055 //          a += b;
00056 //          b += a;
00057 //          b = a + 7;
00058 //
00059 //      Conversions from half to float are lossless; all half numbers
00060 //      are exactly representable as floats.
00061 //
00062 //      Conversions from float to half may not preserve the float's
00063 //      value exactly.  If a float is not representable as a half, the
00064 //      float value is rounded to the nearest representable half.  If
00065 //      a float value is exactly in the middle between the two closest
00066 //      representable half values, then the float value is rounded to
00067 //      the half with the greater magnitude.
00068 //
00069 //      Overflows during float-to-half conversions cause arithmetic
00070 //      exceptions.  An overflow occurs when the float value to be
00071 //      converted is too large to be represented as a half, or if the
00072 //      float value is an infinity or a NAN.
00073 //
00074 //      The implementation of type half makes the following assumptions
00075 //      about the implementation of the built-in C++ types:
00076 //
00077 //          float is an IEEE 754 single-precision number
00078 //          sizeof (float) == 4
00079 //          sizeof (unsigned int) == sizeof (float)
00080 //          alignof (unsigned int) == alignof (float)
00081 //          sizeof (unsigned short) == 2
00082 //
00083 //---------------------------------------------------------------------------
00084 
00085 #ifndef _HALF_H_
00086 #define _HALF_H_
00087 
00088 #include <iostream>
00089 
00090 class half
00091 {
00092   public:
00093 
00094     //-------------
00095     // Constructors
00096     //-------------
00097 
00098     half ();                    // no initialization
00099     half (float f);
00100 
00101 
00102     //--------------------
00103     // Conversion to float
00104     //--------------------
00105 
00106     operator            float () const;
00107 
00108 
00109     //------------
00110     // Unary minus
00111     //------------
00112 
00113     half                operator - () const;
00114 
00115 
00116     //-----------
00117     // Assignment
00118     //-----------
00119 
00120     half &              operator = (half  h);
00121     half &              operator = (float f);
00122 
00123     half &              operator += (half  h);
00124     half &              operator += (float f);
00125 
00126     half &              operator -= (half  h);
00127     half &              operator -= (float f);
00128 
00129     half &              operator *= (half  h);
00130     half &              operator *= (float f);
00131 
00132     half &              operator /= (half  h);
00133     half &              operator /= (float f);
00134 
00135 
00136     //---------------------------------------------------------
00137     // Round to n-bit precision (n should be between 0 and 10).
00138     // After rounding, the significand's 10-n least significant
00139     // bits will be zero.
00140     //---------------------------------------------------------
00141 
00142     half                round (unsigned int n) const;
00143 
00144 
00145     //--------------------------------------------------------------------
00146     // Classification:
00147     //
00148     //  h.isFinite()            returns true if h is a normalized number,
00149     //                          a denormalized number or zero
00150     //
00151     //  h.isNormalized()        returns true if h is a normalized number
00152     //
00153     //  h.isDenormalized()      returns true if h is a denormalized number
00154     //
00155     //  h.isZero()              returns true if h is zero
00156     //
00157     //  h.isNan()               returns true if h is a NAN
00158     //
00159     //  h.isInfinity()          returns true if h is a positive
00160     //                          or a negative infinity
00161     //
00162     //  h.isNegative()          returns true if the sign bit of h
00163     //                          is set (negative)
00164     //--------------------------------------------------------------------
00165 
00166     bool                isFinite () const;
00167     bool                isNormalized () const;
00168     bool                isDenormalized () const;
00169     bool                isZero () const;
00170     bool                isNan () const;
00171     bool                isInfinity () const;
00172     bool                isNegative () const;
00173 
00174 
00175     //--------------------------------------------
00176     // Special values
00177     //
00178     //  posInf()        returns +infinity
00179     //
00180     //  negInf()        returns -infinity
00181     //
00182     //  qNan()          returns a NAN with the bit
00183     //                  pattern 0111111111111111
00184     //
00185     //  sNan()          returns a NAN with the bit
00186     //                  pattern 0111110111111111
00187     //--------------------------------------------
00188 
00189     static half         posInf ();
00190     static half         negInf ();
00191     static half         qNan ();
00192     static half         sNan ();
00193 
00194 
00195     //--------------------------------------
00196     // Access to the internal representation
00197     //--------------------------------------
00198 
00199     unsigned short      bits () const;
00200     void                setBits (unsigned short bits);
00201 
00202 
00203   public:
00204 
00205     union uif
00206     {
00207         unsigned int    i;
00208         float           f;
00209     };
00210 
00211   private:
00212 
00213     static short        convert (int i);
00214     static float        overflow ();
00215 
00216     unsigned short      _h;
00217 
00218     //---------------------------------------------------
00219     // Windows dynamic libraries don't like static
00220     // member variables.
00221     //---------------------------------------------------
00222 #ifndef OPENEXR_DLL
00223     static const uif            _toFloat[1 << 16];
00224     static const unsigned short _eLut[1 << 9];
00225 #endif
00226 };
00227 
00228 #if defined(OPENEXR_DLL)
00229     //--------------------------------------
00230     // Lookup tables defined for Windows DLL
00231     //--------------------------------------
00232     #if defined(HALF_EXPORTS)
00233         extern __declspec(dllexport) half::uif          _toFloat[1 << 16];
00234         extern __declspec(dllexport) unsigned short     _eLut[1 << 9];
00235     #else
00236         extern __declspec(dllimport) half::uif          _toFloat[1 << 16];
00237         extern __declspec(dllimport) unsigned short     _eLut[1 << 9];
00238     #endif
00239 #endif
00240 
00241 
00242 //-----------
00243 // Stream I/O
00244 //-----------
00245 
00246 std::ostream &          operator << (std::ostream &os, half  h);
00247 std::istream &          operator >> (std::istream &is, half &h);
00248 
00249 
00250 //----------
00251 // Debugging
00252 //----------
00253 
00254 void                    printBits   (std::ostream &os, half  h);
00255 void                    printBits   (std::ostream &os, float f);
00256 void                    printBits   (char  c[19], half  h);
00257 void                    printBits   (char  c[35], float f);
00258 
00259 
00260 //-------------------------------------------------------------------------
00261 // Limits
00262 //
00263 // Visual C++ will complain if HALF_MIN, HALF_NRM_MIN etc. are not float
00264 // constants, but at least one other compiler (gcc 2.96) produces incorrect
00265 // results if they are.
00266 //-------------------------------------------------------------------------
00267 
00268 #if (defined _WIN32 || defined _WIN64) && defined _MSC_VER
00269 
00270   #define HALF_MIN      5.96046448e-08f // Smallest positive half
00271 
00272   #define HALF_NRM_MIN  6.10351562e-05f // Smallest positive normalized half
00273 
00274   #define HALF_MAX      65504.0f        // Largest positive half
00275 
00276   #define HALF_EPSILON  0.00097656f     // Smallest positive e for which
00277                                         // half (1.0 + e) != half (1.0)
00278 #else
00279 
00280   #define HALF_MIN      5.96046448e-08  // Smallest positive half
00281 
00282   #define HALF_NRM_MIN  6.10351562e-05  // Smallest positive normalized half
00283 
00284   #define HALF_MAX      65504.0         // Largest positive half
00285 
00286   #define HALF_EPSILON  0.00097656      // Smallest positive e for which
00287                                         // half (1.0 + e) != half (1.0)
00288 #endif
00289 
00290 
00291 #define HALF_MANT_DIG   11              // Number of digits in mantissa
00292                                         // (significand + hidden leading 1)
00293 
00294 #define HALF_DIG        2               // Number of base 10 digits that
00295                                         // can be represented without change
00296 
00297 #define HALF_RADIX      2               // Base of the exponent
00298 
00299 #define HALF_MIN_EXP    -13             // Minimum negative integer such that
00300                                         // HALF_RADIX raised to the power of
00301                                         // one less than that integer is a
00302                                         // normalized half
00303 
00304 #define HALF_MAX_EXP    16              // Maximum positive integer such that
00305                                         // HALF_RADIX raised to the power of
00306                                         // one less than that integer is a
00307                                         // normalized half
00308 
00309 #define HALF_MIN_10_EXP -4              // Minimum positive integer such
00310                                         // that 10 raised to that power is
00311                                         // a normalized half
00312 
00313 #define HALF_MAX_10_EXP 4               // Maximum positive integer such
00314                                         // that 10 raised to that power is
00315                                         // a normalized half
00316 
00317 
00318 //---------------------------------------------------------------------------
00319 //
00320 // Implementation --
00321 //
00322 // Representation of a float:
00323 //
00324 //      We assume that a float, f, is an IEEE 754 single-precision
00325 //      floating point number, whose bits are arranged as follows:
00326 //
00327 //          31 (msb)
00328 //          | 
00329 //          | 30     23
00330 //          | |      | 
00331 //          | |      | 22                    0 (lsb)
00332 //          | |      | |                     |
00333 //          X XXXXXXXX XXXXXXXXXXXXXXXXXXXXXXX
00334 //
00335 //          s e        m
00336 //
00337 //      S is the sign-bit, e is the exponent and m is the significand.
00338 //
00339 //      If e is between 1 and 254, f is a normalized number:
00340 //
00341 //                  s    e-127
00342 //          f = (-1)  * 2      * 1.m
00343 //
00344 //      If e is 0, and m is not zero, f is a denormalized number:
00345 //
00346 //                  s    -126
00347 //          f = (-1)  * 2      * 0.m
00348 //
00349 //      If e and m are both zero, f is zero:
00350 //
00351 //          f = 0.0
00352 //
00353 //      If e is 255, f is an "infinity" or "not a number" (NAN),
00354 //      depending on whether m is zero or not.
00355 //
00356 //      Examples:
00357 //
00358 //          0 00000000 00000000000000000000000 = 0.0
00359 //          0 01111110 00000000000000000000000 = 0.5
00360 //          0 01111111 00000000000000000000000 = 1.0
00361 //          0 10000000 00000000000000000000000 = 2.0
00362 //          0 10000000 10000000000000000000000 = 3.0
00363 //          1 10000101 11110000010000000000000 = -124.0625
00364 //          0 11111111 00000000000000000000000 = +infinity
00365 //          1 11111111 00000000000000000000000 = -infinity
00366 //          0 11111111 10000000000000000000000 = NAN
00367 //          1 11111111 11111111111111111111111 = NAN
00368 //
00369 // Representation of a half:
00370 //
00371 //      Here is the bit-layout for a half number, h:
00372 //
00373 //          15 (msb)
00374 //          | 
00375 //          | 14  10
00376 //          | |   |
00377 //          | |   | 9        0 (lsb)
00378 //          | |   | |        |
00379 //          X XXXXX XXXXXXXXXX
00380 //
00381 //          s e     m
00382 //
00383 //      S is the sign-bit, e is the exponent and m is the significand.
00384 //
00385 //      If e is between 1 and 30, h is a normalized number:
00386 //
00387 //                  s    e-15
00388 //          h = (-1)  * 2     * 1.m
00389 //
00390 //      If e is 0, and m is not zero, h is a denormalized number:
00391 //
00392 //                  S    -14
00393 //          h = (-1)  * 2     * 0.m
00394 //
00395 //      If e and m are both zero, h is zero:
00396 //
00397 //          h = 0.0
00398 //
00399 //      If e is 31, h is an "infinity" or "not a number" (NAN),
00400 //      depending on whether m is zero or not.
00401 //
00402 //      Examples:
00403 //
00404 //          0 00000 0000000000 = 0.0
00405 //          0 01110 0000000000 = 0.5
00406 //          0 01111 0000000000 = 1.0
00407 //          0 10000 0000000000 = 2.0
00408 //          0 10000 1000000000 = 3.0
00409 //          1 10101 1111000001 = -124.0625
00410 //          0 11111 0000000000 = +infinity
00411 //          1 11111 0000000000 = -infinity
00412 //          0 11111 1000000000 = NAN
00413 //          1 11111 1111111111 = NAN
00414 //
00415 // Conversion:
00416 //
00417 //      Converting from a float to a half requires some non-trivial bit
00418 //      manipulations.  In some cases, this makes conversion relatively
00419 //      slow, but the most common case is accelerated via table lookups.
00420 //
00421 //      Converting back from a half to a float is easier because we don't
00422 //      have to do any rounding.  In addition, there are only 65536
00423 //      different half numbers; we can convert each of those numbers once
00424 //      and store the results in a table.  Later, all conversions can be
00425 //      done using only simple table lookups.
00426 //
00427 //---------------------------------------------------------------------------
00428 
00429 
00430 //--------------------
00431 // Simple constructors
00432 //--------------------
00433 
00434 inline
00435 half::half ()
00436 {
00437     // no initialization
00438 }
00439 
00440 
00441 //----------------------------
00442 // Half-from-float constructor
00443 //----------------------------
00444 
00445 inline
00446 half::half (float f)
00447 {
00448     uif x;
00449 
00450     x.f = f;
00451 
00452     if (f == 0)
00453     {
00454         //
00455         // Common special case - zero.
00456         // Preserve the zero's sign bit.
00457         //
00458 
00459         _h = (x.i >> 16);
00460     }
00461     else
00462     {
00463         //
00464         // We extract the combined sign and exponent, e, from our
00465         // floating-point number, f.  Then we convert e to the sign
00466         // and exponent of the half number via a table lookup.
00467         //
00468         // For the most common case, where a normalized half is produced,
00469         // the table lookup returns a non-zero value; in this case, all
00470         // we have to do is round f's significand to 10 bits and combine
00471         // the result with e.
00472         //
00473         // For all other cases (overflow, zeroes, denormalized numbers
00474         // resulting from underflow, infinities and NANs), the table
00475         // lookup returns zero, and we call a longer, non-inline function
00476         // to do the float-to-half conversion.
00477         //
00478 
00479         register int e = (x.i >> 23) & 0x000001ff;
00480 
00481         e = _eLut[e];
00482 
00483         if (e)
00484         {
00485             //
00486             // Simple case - round the significand and
00487             // combine it with the sign and exponent.
00488             //
00489 
00490             _h = e + (((x.i & 0x007fffff) + 0x00001000) >> 13);
00491         }
00492         else
00493         {
00494             //
00495             // Difficult case - call a function.
00496             //
00497 
00498             _h = convert (x.i);
00499         }
00500     }
00501 }
00502 
00503 
00504 //------------------------------------------
00505 // Half-to-float conversion via table lookup
00506 //------------------------------------------
00507 
00508 inline
00509 half::operator float () const
00510 {
00511     return _toFloat[_h].f;
00512 }
00513 
00514 
00515 //-------------------------
00516 // Round to n-bit precision
00517 //-------------------------
00518 
00519 inline half
00520 half::round (unsigned int n) const
00521 {
00522     //
00523     // Parameter check.
00524     //
00525 
00526     if (n >= 10)
00527         return *this;
00528 
00529     //
00530     // Disassemble h into the sign, s,
00531     // and the combined exponent and significand, e.
00532     //
00533 
00534     unsigned short s = _h & 0x8000;
00535     unsigned short e = _h & 0x7fff;
00536 
00537     //
00538     // Round the exponent and significand to the nearest value
00539     // where ones occur only in the (10-n) most significant bits.
00540     // Note that the exponent adjusts automatically if rounding
00541     // up causes the significand to overflow.
00542     //
00543 
00544     e >>= 9 - n;
00545     e  += e & 1;
00546     e <<= 9 - n;
00547 
00548     //
00549     // Check for exponent overflow.
00550     //
00551 
00552     if (e >= 0x7c00)
00553     {
00554         //
00555         // Overflow occurred -- truncate instead of rounding.
00556         //
00557 
00558         e = _h;
00559         e >>= 10 - n;
00560         e <<= 10 - n;
00561     }
00562 
00563     //
00564     // Put the original sign bit back.
00565     //
00566 
00567     half h;
00568     h._h = s | e;
00569 
00570     return h;
00571 }
00572 
00573 
00574 //-----------------------
00575 // Other inline functions
00576 //-----------------------
00577 
00578 inline half     
00579 half::operator - () const
00580 {
00581     half h;
00582     h._h = _h ^ 0x8000;
00583     return h;
00584 }
00585 
00586 
00587 inline half &
00588 half::operator = (half h)
00589 {
00590     _h = h._h;
00591     return *this;
00592 }
00593 
00594 
00595 inline half &
00596 half::operator = (float f)
00597 {
00598     *this = half (f);
00599     return *this;
00600 }
00601 
00602 
00603 inline half &
00604 half::operator += (half h)
00605 {
00606     *this = half (float (*this) + float (h));
00607     return *this;
00608 }
00609 
00610 
00611 inline half &
00612 half::operator += (float f)
00613 {
00614     *this = half (float (*this) + f);
00615     return *this;
00616 }
00617 
00618 
00619 inline half &
00620 half::operator -= (half h)
00621 {
00622     *this = half (float (*this) - float (h));
00623     return *this;
00624 }
00625 
00626 
00627 inline half &
00628 half::operator -= (float f)
00629 {
00630     *this = half (float (*this) - f);
00631     return *this;
00632 }
00633 
00634 
00635 inline half &
00636 half::operator *= (half h)
00637 {
00638     *this = half (float (*this) * float (h));
00639     return *this;
00640 }
00641 
00642 
00643 inline half &
00644 half::operator *= (float f)
00645 {
00646     *this = half (float (*this) * f);
00647     return *this;
00648 }
00649 
00650 
00651 inline half &
00652 half::operator /= (half h)
00653 {
00654     *this = half (float (*this) / float (h));
00655     return *this;
00656 }
00657 
00658 
00659 inline half &
00660 half::operator /= (float f)
00661 {
00662     *this = half (float (*this) / f);
00663     return *this;
00664 }
00665 
00666 
00667 inline bool     
00668 half::isFinite () const
00669 {
00670     unsigned short e = (_h >> 10) & 0x001f;
00671     return e < 31;
00672 }
00673 
00674 
00675 inline bool
00676 half::isNormalized () const
00677 {
00678     unsigned short e = (_h >> 10) & 0x001f;
00679     return e > 0 && e < 31;
00680 }
00681 
00682 
00683 inline bool
00684 half::isDenormalized () const
00685 {
00686     unsigned short e = (_h >> 10) & 0x001f;
00687     unsigned short m =  _h & 0x3ff;
00688     return e == 0 && m != 0;
00689 }
00690 
00691 
00692 inline bool
00693 half::isZero () const
00694 {
00695     return (_h & 0x7fff) == 0;
00696 }
00697 
00698 
00699 inline bool
00700 half::isNan () const
00701 {
00702     unsigned short e = (_h >> 10) & 0x001f;
00703     unsigned short m =  _h & 0x3ff;
00704     return e == 31 && m != 0;
00705 }
00706 
00707 
00708 inline bool
00709 half::isInfinity () const
00710 {
00711     unsigned short e = (_h >> 10) & 0x001f;
00712     unsigned short m =  _h & 0x3ff;
00713     return e == 31 && m == 0;
00714 }
00715 
00716 
00717 inline bool     
00718 half::isNegative () const
00719 {
00720     return (_h & 0x8000) != 0;
00721 }
00722 
00723 
00724 inline half
00725 half::posInf ()
00726 {
00727     half h;
00728     h._h = 0x7c00;
00729     return h;
00730 }
00731 
00732 
00733 inline half
00734 half::negInf ()
00735 {
00736     half h;
00737     h._h = 0xfc00;
00738     return h;
00739 }
00740 
00741 
00742 inline half
00743 half::qNan ()
00744 {
00745     half h;
00746     h._h = 0x7fff;
00747     return h;
00748 }
00749 
00750 
00751 inline half
00752 half::sNan ()
00753 {
00754     half h;
00755     h._h = 0x7dff;
00756     return h;
00757 }
00758 
00759 
00760 inline unsigned short
00761 half::bits () const
00762 {
00763     return _h;
00764 }
00765 
00766 
00767 inline void
00768 half::setBits (unsigned short bits)
00769 {
00770     _h = bits;
00771 }
00772 
00773 #undef HALF_EXPORT_CONST
00774 
00775 #endif
00776