threemxl: half.cpp Source File

Go to the documentation of this file.
00001 
00002 //
00003 // Copyright (c) 2002, Industrial Light & Magic, a division of Lucas
00004 // Digital Ltd. LLC
00005 //
00006 // All rights reserved.
00007 //
00008 // Redistribution and use in source and binary forms, with or without
00009 // modification, are permitted provided that the following conditions are
00010 // met:
00011 // *       Redistributions of source code must retain the above copyright
00012 // notice, this list of conditions and the following disclaimer.
00013 // *       Redistributions in binary form must reproduce the above
00014 // copyright notice, this list of conditions and the following disclaimer
00015 // in the documentation and/or other materials provided with the
00016 // distribution.
00017 // *       Neither the name of Industrial Light & Magic nor the names of
00018 // its contributors may be used to endorse or promote products derived
00019 // from this software without specific prior written permission.
00020 //
00021 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00022 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00023 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00024 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
00025 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00026 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00027 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00028 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00029 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00030 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
00031 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00032 //
00034 
00035 // Primary authors:
00036 //     Florian Kainz <kainz@ilm.com>
00037 //     Rod Bogart <rgb@ilm.com>
00038 
00039 
00040 //---------------------------------------------------------------------------
00041 //
00042 //      class half --
00043 //      implementation of non-inline members
00044 //
00045 //---------------------------------------------------------------------------
00046 #include <assert.h>
00047 #include <threemxl/externals/half/half.h>
00048 
00049 using namespace std;
00050 
00051 //-------------------------------------------------------------
00052 // Lookup tables for half-to-float and float-to-half conversion
00053 //-------------------------------------------------------------
00054 
00055 const half::uif half::_toFloat[1 << 16] =
00056     #include <threemxl/externals/half/toFloat.h>
00057 const unsigned short half::_eLut[1 << 9] =
00058     #include <threemxl/externals/half/eLut.h>
00059 
00060 //-----------------------------------------------
00061 // Overflow handler for float-to-half conversion;
00062 // generates a hardware floating-point overflow,
00063 // which may be trapped by the operating system.
00064 //-----------------------------------------------
00065 
00066 float
00067 half::overflow ()
00068 {
00069     volatile float f = 1e10;
00070 
00071     for (int i = 0; i < 10; i++)
00072         f *= f;                         // this will overflow before
00073                                         // the forloop terminates
00074     return f;
00075 }
00076 
00077 
00078 //-----------------------------------------------------
00079 // Float-to-half conversion -- general case, including
00080 // zeroes, denormalized numbers and exponent overflows.
00081 //-----------------------------------------------------
00082 
00083 short
00084 half::convert (int i)
00085 {
00086     //
00087     // Our floating point number, f, is represented by the bit
00088     // pattern in integer i.  Disassemble that bit pattern into
00089     // the sign, s, the exponent, e, and the significand, m.
00090     // Shift s into the position where it will go in in the
00091     // resulting half number.
00092     // Adjust e, accounting for the different exponent bias
00093     // of float and half (127 versus 15).
00094     //
00095 
00096     register int s =  (i >> 16) & 0x00008000;
00097     register int e = ((i >> 23) & 0x000000ff) - (127 - 15);
00098     register int m =   i        & 0x007fffff;
00099 
00100     //
00101     // Now reassemble s, e and m into a half:
00102     //
00103 
00104     if (e <= 0)
00105     {
00106         if (e < -10)
00107         {
00108             //
00109             // E is less than -10.  The absolute value of f is
00110             // less than HALF_MIN (f may be a small normalized
00111             // float, a denormalized float or a zero).
00112             //
00113             // We convert f to a half zero with the same sign as f.
00114             //
00115 
00116             return s;
00117         }
00118 
00119         //
00120         // E is between -10 and 0.  F is a normalized float
00121         // whose magnitude is less than HALF_NRM_MIN.
00122         //
00123         // We convert f to a denormalized half.
00124         //
00125 
00126         m = (m | 0x00800000) >> (1 - e);
00127 
00128         //
00129         // Round to nearest, round "0.5" up.
00130         //
00131         // Rounding may cause the significand to overflow and make
00132         // our number normalized.  Because of the way a half's bits
00133         // are laid out, we don't have to treat this case separately;
00134         // the code below will handle it correctly.
00135         //
00136 
00137         if (m &  0x00001000)
00138             m += 0x00002000;
00139 
00140         //
00141         // Assemble the half from s, e (zero) and m.
00142         //
00143 
00144         return s | (m >> 13);
00145     }
00146     else if (e == 0xff - (127 - 15))
00147     {
00148         if (m == 0)
00149         {
00150             //
00151             // F is an infinity; convert f to a half
00152             // infinity with the same sign as f.
00153             //
00154 
00155             return s | 0x7c00;
00156         }
00157         else
00158         {
00159             //
00160             // F is a NAN; we produce a half NAN that preserves
00161             // the sign bit and the 10 leftmost bits of the
00162             // significand of f, with one exception: If the 10
00163             // leftmost bits are all zero, the NAN would turn
00164             // into an infinity, so we have to set at least one
00165             // bit in the significand.
00166             //
00167 
00168             m >>= 13;
00169             return s | 0x7c00 | m | (m == 0);
00170         }
00171     }
00172     else
00173     {
00174         //
00175         // E is greater than zero.  F is a normalized float.
00176         // We try to convert f to a normalized half.
00177         //
00178 
00179         //
00180         // Round to nearest, round "0.5" up
00181         //
00182 
00183         if (m &  0x00001000)
00184         {
00185             m += 0x00002000;
00186 
00187             if (m & 0x00800000)
00188             {
00189                 m =  0;         // overflow in significand,
00190                 e += 1;         // adjust exponent
00191             }
00192         }
00193 
00194         //
00195         // Handle exponent overflow
00196         //
00197 
00198         if (e > 30)
00199         {
00200             overflow ();        // Cause a hardware floating point overflow;
00201             return s | 0x7c00;  // if this returns, the half becomes an
00202         }                       // infinity with the same sign as f.
00203 
00204         //
00205         // Assemble the half from s, e and m.
00206         //
00207 
00208         return s | (e << 10) | (m >> 13);
00209     }
00210 }
00211 
00212 
00213 //---------------------
00214 // Stream I/O operators
00215 //---------------------
00216 
00217 ostream &
00218 operator << (ostream &os, half h)
00219 {
00220     os << float (h);
00221     return os;
00222 }
00223 
00224 
00225 istream &
00226 operator >> (istream &is, half &h)
00227 {
00228     float f;
00229     is >> f;
00230     h = half (f);
00231     return is;
00232 }
00233 
00234 
00235 //---------------------------------------
00236 // Functions to print the bit-layout of
00237 // floats and halfs, mostly for debugging
00238 //---------------------------------------
00239 
00240 void
00241 printBits (ostream &os, half h)
00242 {
00243     unsigned short b = h.bits();
00244 
00245     for (int i = 15; i >= 0; i--)
00246     {
00247         os << (((b >> i) & 1)? '1': '0');
00248 
00249         if (i == 15 || i == 10)
00250             os << ' ';
00251     }
00252 }
00253 
00254 
00255 void
00256 printBits (ostream &os, float f)
00257 {
00258     half::uif x;
00259     x.f = f;
00260 
00261     for (int i = 31; i >= 0; i--)
00262     {
00263         os << (((x.i >> i) & 1)? '1': '0');
00264 
00265         if (i == 31 || i == 23)
00266             os << ' ';
00267     }
00268 }
00269 
00270 
00271 void
00272 printBits (char c[19], half h)
00273 {
00274     unsigned short b = h.bits();
00275 
00276     for (int i = 15, j = 0; i >= 0; i--, j++)
00277     {
00278         c[j] = (((b >> i) & 1)? '1': '0');
00279 
00280         if (i == 15 || i == 10)
00281             c[++j] = ' ';
00282     }
00283 
00284     c[18] = 0;
00285 }
00286 
00287 
00288 void
00289 printBits (char c[35], float f)
00290 {
00291     half::uif x;
00292     x.f = f;
00293 
00294     for (int i = 31, j = 0; i >= 0; i--, j++)
00295     {
00296         c[j] = (((x.i >> i) & 1)? '1': '0');
00297 
00298         if (i == 31 || i == 23)
00299             c[++j] = ' ';
00300     }
00301 
00302     c[34] = 0;
00303 }