00001 00002 // 00003 // Copyright (c) 2002, Industrial Light & Magic, a division of Lucas 00004 // Digital Ltd. LLC 00005 // 00006 // All rights reserved. 00007 // 00008 // Redistribution and use in source and binary forms, with or without 00009 // modification, are permitted provided that the following conditions are 00010 // met: 00011 // * Redistributions of source code must retain the above copyright 00012 // notice, this list of conditions and the following disclaimer. 00013 // * Redistributions in binary form must reproduce the above 00014 // copyright notice, this list of conditions and the following disclaimer 00015 // in the documentation and/or other materials provided with the 00016 // distribution. 00017 // * Neither the name of Industrial Light & Magic nor the names of 00018 // its contributors may be used to endorse or promote products derived 00019 // from this software without specific prior written permission. 00020 // 00021 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00022 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00023 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 00024 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 00025 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00026 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00027 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00028 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00029 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00030 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00031 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 // 00034 00035 // Primary authors: 00036 // Florian Kainz <kainz@ilm.com> 00037 // Rod Bogart <rgb@ilm.com> 00038 00039 00040 //--------------------------------------------------------------------------- 00041 // 00042 // class half -- 00043 // implementation of non-inline members 00044 // 00045 //--------------------------------------------------------------------------- 00046 #include <assert.h> 00047 #include <threemxl/externals/half/half.h> 00048 00049 using namespace std; 00050 00051 //------------------------------------------------------------- 00052 // Lookup tables for half-to-float and float-to-half conversion 00053 //------------------------------------------------------------- 00054 00055 const half::uif half::_toFloat[1 << 16] = 00056 #include <threemxl/externals/half/toFloat.h> 00057 const unsigned short half::_eLut[1 << 9] = 00058 #include <threemxl/externals/half/eLut.h> 00059 00060 //----------------------------------------------- 00061 // Overflow handler for float-to-half conversion; 00062 // generates a hardware floating-point overflow, 00063 // which may be trapped by the operating system. 00064 //----------------------------------------------- 00065 00066 float 00067 half::overflow () 00068 { 00069 volatile float f = 1e10; 00070 00071 for (int i = 0; i < 10; i++) 00072 f *= f; // this will overflow before 00073 // the forloop terminates 00074 return f; 00075 } 00076 00077 00078 //----------------------------------------------------- 00079 // Float-to-half conversion -- general case, including 00080 // zeroes, denormalized numbers and exponent overflows. 00081 //----------------------------------------------------- 00082 00083 short 00084 half::convert (int i) 00085 { 00086 // 00087 // Our floating point number, f, is represented by the bit 00088 // pattern in integer i. Disassemble that bit pattern into 00089 // the sign, s, the exponent, e, and the significand, m. 00090 // Shift s into the position where it will go in in the 00091 // resulting half number. 00092 // Adjust e, accounting for the different exponent bias 00093 // of float and half (127 versus 15). 00094 // 00095 00096 register int s = (i >> 16) & 0x00008000; 00097 register int e = ((i >> 23) & 0x000000ff) - (127 - 15); 00098 register int m = i & 0x007fffff; 00099 00100 // 00101 // Now reassemble s, e and m into a half: 00102 // 00103 00104 if (e <= 0) 00105 { 00106 if (e < -10) 00107 { 00108 // 00109 // E is less than -10. The absolute value of f is 00110 // less than HALF_MIN (f may be a small normalized 00111 // float, a denormalized float or a zero). 00112 // 00113 // We convert f to a half zero with the same sign as f. 00114 // 00115 00116 return s; 00117 } 00118 00119 // 00120 // E is between -10 and 0. F is a normalized float 00121 // whose magnitude is less than HALF_NRM_MIN. 00122 // 00123 // We convert f to a denormalized half. 00124 // 00125 00126 m = (m | 0x00800000) >> (1 - e); 00127 00128 // 00129 // Round to nearest, round "0.5" up. 00130 // 00131 // Rounding may cause the significand to overflow and make 00132 // our number normalized. Because of the way a half's bits 00133 // are laid out, we don't have to treat this case separately; 00134 // the code below will handle it correctly. 00135 // 00136 00137 if (m & 0x00001000) 00138 m += 0x00002000; 00139 00140 // 00141 // Assemble the half from s, e (zero) and m. 00142 // 00143 00144 return s | (m >> 13); 00145 } 00146 else if (e == 0xff - (127 - 15)) 00147 { 00148 if (m == 0) 00149 { 00150 // 00151 // F is an infinity; convert f to a half 00152 // infinity with the same sign as f. 00153 // 00154 00155 return s | 0x7c00; 00156 } 00157 else 00158 { 00159 // 00160 // F is a NAN; we produce a half NAN that preserves 00161 // the sign bit and the 10 leftmost bits of the 00162 // significand of f, with one exception: If the 10 00163 // leftmost bits are all zero, the NAN would turn 00164 // into an infinity, so we have to set at least one 00165 // bit in the significand. 00166 // 00167 00168 m >>= 13; 00169 return s | 0x7c00 | m | (m == 0); 00170 } 00171 } 00172 else 00173 { 00174 // 00175 // E is greater than zero. F is a normalized float. 00176 // We try to convert f to a normalized half. 00177 // 00178 00179 // 00180 // Round to nearest, round "0.5" up 00181 // 00182 00183 if (m & 0x00001000) 00184 { 00185 m += 0x00002000; 00186 00187 if (m & 0x00800000) 00188 { 00189 m = 0; // overflow in significand, 00190 e += 1; // adjust exponent 00191 } 00192 } 00193 00194 // 00195 // Handle exponent overflow 00196 // 00197 00198 if (e > 30) 00199 { 00200 overflow (); // Cause a hardware floating point overflow; 00201 return s | 0x7c00; // if this returns, the half becomes an 00202 } // infinity with the same sign as f. 00203 00204 // 00205 // Assemble the half from s, e and m. 00206 // 00207 00208 return s | (e << 10) | (m >> 13); 00209 } 00210 } 00211 00212 00213 //--------------------- 00214 // Stream I/O operators 00215 //--------------------- 00216 00217 ostream & 00218 operator << (ostream &os, half h) 00219 { 00220 os << float (h); 00221 return os; 00222 } 00223 00224 00225 istream & 00226 operator >> (istream &is, half &h) 00227 { 00228 float f; 00229 is >> f; 00230 h = half (f); 00231 return is; 00232 } 00233 00234 00235 //--------------------------------------- 00236 // Functions to print the bit-layout of 00237 // floats and halfs, mostly for debugging 00238 //--------------------------------------- 00239 00240 void 00241 printBits (ostream &os, half h) 00242 { 00243 unsigned short b = h.bits(); 00244 00245 for (int i = 15; i >= 0; i--) 00246 { 00247 os << (((b >> i) & 1)? '1': '0'); 00248 00249 if (i == 15 || i == 10) 00250 os << ' '; 00251 } 00252 } 00253 00254 00255 void 00256 printBits (ostream &os, float f) 00257 { 00258 half::uif x; 00259 x.f = f; 00260 00261 for (int i = 31; i >= 0; i--) 00262 { 00263 os << (((x.i >> i) & 1)? '1': '0'); 00264 00265 if (i == 31 || i == 23) 00266 os << ' '; 00267 } 00268 } 00269 00270 00271 void 00272 printBits (char c[19], half h) 00273 { 00274 unsigned short b = h.bits(); 00275 00276 for (int i = 15, j = 0; i >= 0; i--, j++) 00277 { 00278 c[j] = (((b >> i) & 1)? '1': '0'); 00279 00280 if (i == 15 || i == 10) 00281 c[++j] = ' '; 00282 } 00283 00284 c[18] = 0; 00285 } 00286 00287 00288 void 00289 printBits (char c[35], float f) 00290 { 00291 half::uif x; 00292 x.f = f; 00293 00294 for (int i = 31, j = 0; i >= 0; i--, j++) 00295 { 00296 c[j] = (((x.i >> i) & 1)? '1': '0'); 00297 00298 if (i == 31 || i == 23) 00299 c[++j] = ' '; 00300 } 00301 00302 c[34] = 0; 00303 }