00001 00002 // 00003 // Copyright (c) 2002, Industrial Light & Magic, a division of Lucas 00004 // Digital Ltd. LLC 00005 // 00006 // All rights reserved. 00007 // 00008 // Redistribution and use in source and binary forms, with or without 00009 // modification, are permitted provided that the following conditions are 00010 // met: 00011 // * Redistributions of source code must retain the above copyright 00012 // notice, this list of conditions and the following disclaimer. 00013 // * Redistributions in binary form must reproduce the above 00014 // copyright notice, this list of conditions and the following disclaimer 00015 // in the documentation and/or other materials provided with the 00016 // distribution. 00017 // * Neither the name of Industrial Light & Magic nor the names of 00018 // its contributors may be used to endorse or promote products derived 00019 // from this software without specific prior written permission. 00020 // 00021 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00022 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00023 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 00024 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 00025 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00026 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00027 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00028 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00029 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00030 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00031 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 // 00034 00035 // Primary authors: 00036 // Florian Kainz <kainz@ilm.com> 00037 // Rod Bogart <rgb@ilm.com> 00038 00039 //--------------------------------------------------------------------------- 00040 // 00041 // half -- a 16-bit floating point number class: 00042 // 00043 // Type half can represent positive and negative numbers whose 00044 // magnitude is between roughly 6.1e-5 and 6.5e+4 with a relative 00045 // error of 9.8e-4; numbers smaller than 6.1e-5 can be represented 00046 // with an absolute error of 6.0e-8. All integers from -2048 to 00047 // +2048 can be represented exactly. 00048 // 00049 // Type half behaves (almost) like the built-in C++ floating point 00050 // types. In arithmetic expressions, half, float and double can be 00051 // mixed freely. Here are a few examples: 00052 // 00053 // half a (3.5); 00054 // float b (a + sqrt (a)); 00055 // a += b; 00056 // b += a; 00057 // b = a + 7; 00058 // 00059 // Conversions from half to float are lossless; all half numbers 00060 // are exactly representable as floats. 00061 // 00062 // Conversions from float to half may not preserve the float's 00063 // value exactly. If a float is not representable as a half, the 00064 // float value is rounded to the nearest representable half. If 00065 // a float value is exactly in the middle between the two closest 00066 // representable half values, then the float value is rounded to 00067 // the half with the greater magnitude. 00068 // 00069 // Overflows during float-to-half conversions cause arithmetic 00070 // exceptions. An overflow occurs when the float value to be 00071 // converted is too large to be represented as a half, or if the 00072 // float value is an infinity or a NAN. 00073 // 00074 // The implementation of type half makes the following assumptions 00075 // about the implementation of the built-in C++ types: 00076 // 00077 // float is an IEEE 754 single-precision number 00078 // sizeof (float) == 4 00079 // sizeof (unsigned int) == sizeof (float) 00080 // alignof (unsigned int) == alignof (float) 00081 // sizeof (unsigned short) == 2 00082 // 00083 //--------------------------------------------------------------------------- 00084 00085 #ifndef _HALF_H_ 00086 #define _HALF_H_ 00087 00088 #include <iostream> 00089 00090 class half 00091 { 00092 public: 00093 00094 //------------- 00095 // Constructors 00096 //------------- 00097 00098 half (); // no initialization 00099 half (float f); 00100 00101 00102 //-------------------- 00103 // Conversion to float 00104 //-------------------- 00105 00106 operator float () const; 00107 00108 00109 //------------ 00110 // Unary minus 00111 //------------ 00112 00113 half operator - () const; 00114 00115 00116 //----------- 00117 // Assignment 00118 //----------- 00119 00120 half & operator = (half h); 00121 half & operator = (float f); 00122 00123 half & operator += (half h); 00124 half & operator += (float f); 00125 00126 half & operator -= (half h); 00127 half & operator -= (float f); 00128 00129 half & operator *= (half h); 00130 half & operator *= (float f); 00131 00132 half & operator /= (half h); 00133 half & operator /= (float f); 00134 00135 00136 //--------------------------------------------------------- 00137 // Round to n-bit precision (n should be between 0 and 10). 00138 // After rounding, the significand's 10-n least significant 00139 // bits will be zero. 00140 //--------------------------------------------------------- 00141 00142 half round (unsigned int n) const; 00143 00144 00145 //-------------------------------------------------------------------- 00146 // Classification: 00147 // 00148 // h.isFinite() returns true if h is a normalized number, 00149 // a denormalized number or zero 00150 // 00151 // h.isNormalized() returns true if h is a normalized number 00152 // 00153 // h.isDenormalized() returns true if h is a denormalized number 00154 // 00155 // h.isZero() returns true if h is zero 00156 // 00157 // h.isNan() returns true if h is a NAN 00158 // 00159 // h.isInfinity() returns true if h is a positive 00160 // or a negative infinity 00161 // 00162 // h.isNegative() returns true if the sign bit of h 00163 // is set (negative) 00164 //-------------------------------------------------------------------- 00165 00166 bool isFinite () const; 00167 bool isNormalized () const; 00168 bool isDenormalized () const; 00169 bool isZero () const; 00170 bool isNan () const; 00171 bool isInfinity () const; 00172 bool isNegative () const; 00173 00174 00175 //-------------------------------------------- 00176 // Special values 00177 // 00178 // posInf() returns +infinity 00179 // 00180 // negInf() returns -infinity 00181 // 00182 // qNan() returns a NAN with the bit 00183 // pattern 0111111111111111 00184 // 00185 // sNan() returns a NAN with the bit 00186 // pattern 0111110111111111 00187 //-------------------------------------------- 00188 00189 static half posInf (); 00190 static half negInf (); 00191 static half qNan (); 00192 static half sNan (); 00193 00194 00195 //-------------------------------------- 00196 // Access to the internal representation 00197 //-------------------------------------- 00198 00199 unsigned short bits () const; 00200 void setBits (unsigned short bits); 00201 00202 00203 public: 00204 00205 union uif 00206 { 00207 unsigned int i; 00208 float f; 00209 }; 00210 00211 private: 00212 00213 static short convert (int i); 00214 static float overflow (); 00215 00216 unsigned short _h; 00217 00218 //--------------------------------------------------- 00219 // Windows dynamic libraries don't like static 00220 // member variables. 00221 //--------------------------------------------------- 00222 #ifndef OPENEXR_DLL 00223 static const uif _toFloat[1 << 16]; 00224 static const unsigned short _eLut[1 << 9]; 00225 #endif 00226 }; 00227 00228 #if defined(OPENEXR_DLL) 00229 //-------------------------------------- 00230 // Lookup tables defined for Windows DLL 00231 //-------------------------------------- 00232 #if defined(HALF_EXPORTS) 00233 extern __declspec(dllexport) half::uif _toFloat[1 << 16]; 00234 extern __declspec(dllexport) unsigned short _eLut[1 << 9]; 00235 #else 00236 extern __declspec(dllimport) half::uif _toFloat[1 << 16]; 00237 extern __declspec(dllimport) unsigned short _eLut[1 << 9]; 00238 #endif 00239 #endif 00240 00241 00242 //----------- 00243 // Stream I/O 00244 //----------- 00245 00246 std::ostream & operator << (std::ostream &os, half h); 00247 std::istream & operator >> (std::istream &is, half &h); 00248 00249 00250 //---------- 00251 // Debugging 00252 //---------- 00253 00254 void printBits (std::ostream &os, half h); 00255 void printBits (std::ostream &os, float f); 00256 void printBits (char c[19], half h); 00257 void printBits (char c[35], float f); 00258 00259 00260 //------------------------------------------------------------------------- 00261 // Limits 00262 // 00263 // Visual C++ will complain if HALF_MIN, HALF_NRM_MIN etc. are not float 00264 // constants, but at least one other compiler (gcc 2.96) produces incorrect 00265 // results if they are. 00266 //------------------------------------------------------------------------- 00267 00268 #if (defined _WIN32 || defined _WIN64) && defined _MSC_VER 00269 00270 #define HALF_MIN 5.96046448e-08f // Smallest positive half 00271 00272 #define HALF_NRM_MIN 6.10351562e-05f // Smallest positive normalized half 00273 00274 #define HALF_MAX 65504.0f // Largest positive half 00275 00276 #define HALF_EPSILON 0.00097656f // Smallest positive e for which 00277 // half (1.0 + e) != half (1.0) 00278 #else 00279 00280 #define HALF_MIN 5.96046448e-08 // Smallest positive half 00281 00282 #define HALF_NRM_MIN 6.10351562e-05 // Smallest positive normalized half 00283 00284 #define HALF_MAX 65504.0 // Largest positive half 00285 00286 #define HALF_EPSILON 0.00097656 // Smallest positive e for which 00287 // half (1.0 + e) != half (1.0) 00288 #endif 00289 00290 00291 #define HALF_MANT_DIG 11 // Number of digits in mantissa 00292 // (significand + hidden leading 1) 00293 00294 #define HALF_DIG 2 // Number of base 10 digits that 00295 // can be represented without change 00296 00297 #define HALF_RADIX 2 // Base of the exponent 00298 00299 #define HALF_MIN_EXP -13 // Minimum negative integer such that 00300 // HALF_RADIX raised to the power of 00301 // one less than that integer is a 00302 // normalized half 00303 00304 #define HALF_MAX_EXP 16 // Maximum positive integer such that 00305 // HALF_RADIX raised to the power of 00306 // one less than that integer is a 00307 // normalized half 00308 00309 #define HALF_MIN_10_EXP -4 // Minimum positive integer such 00310 // that 10 raised to that power is 00311 // a normalized half 00312 00313 #define HALF_MAX_10_EXP 4 // Maximum positive integer such 00314 // that 10 raised to that power is 00315 // a normalized half 00316 00317 00318 //--------------------------------------------------------------------------- 00319 // 00320 // Implementation -- 00321 // 00322 // Representation of a float: 00323 // 00324 // We assume that a float, f, is an IEEE 754 single-precision 00325 // floating point number, whose bits are arranged as follows: 00326 // 00327 // 31 (msb) 00328 // | 00329 // | 30 23 00330 // | | | 00331 // | | | 22 0 (lsb) 00332 // | | | | | 00333 // X XXXXXXXX XXXXXXXXXXXXXXXXXXXXXXX 00334 // 00335 // s e m 00336 // 00337 // S is the sign-bit, e is the exponent and m is the significand. 00338 // 00339 // If e is between 1 and 254, f is a normalized number: 00340 // 00341 // s e-127 00342 // f = (-1) * 2 * 1.m 00343 // 00344 // If e is 0, and m is not zero, f is a denormalized number: 00345 // 00346 // s -126 00347 // f = (-1) * 2 * 0.m 00348 // 00349 // If e and m are both zero, f is zero: 00350 // 00351 // f = 0.0 00352 // 00353 // If e is 255, f is an "infinity" or "not a number" (NAN), 00354 // depending on whether m is zero or not. 00355 // 00356 // Examples: 00357 // 00358 // 0 00000000 00000000000000000000000 = 0.0 00359 // 0 01111110 00000000000000000000000 = 0.5 00360 // 0 01111111 00000000000000000000000 = 1.0 00361 // 0 10000000 00000000000000000000000 = 2.0 00362 // 0 10000000 10000000000000000000000 = 3.0 00363 // 1 10000101 11110000010000000000000 = -124.0625 00364 // 0 11111111 00000000000000000000000 = +infinity 00365 // 1 11111111 00000000000000000000000 = -infinity 00366 // 0 11111111 10000000000000000000000 = NAN 00367 // 1 11111111 11111111111111111111111 = NAN 00368 // 00369 // Representation of a half: 00370 // 00371 // Here is the bit-layout for a half number, h: 00372 // 00373 // 15 (msb) 00374 // | 00375 // | 14 10 00376 // | | | 00377 // | | | 9 0 (lsb) 00378 // | | | | | 00379 // X XXXXX XXXXXXXXXX 00380 // 00381 // s e m 00382 // 00383 // S is the sign-bit, e is the exponent and m is the significand. 00384 // 00385 // If e is between 1 and 30, h is a normalized number: 00386 // 00387 // s e-15 00388 // h = (-1) * 2 * 1.m 00389 // 00390 // If e is 0, and m is not zero, h is a denormalized number: 00391 // 00392 // S -14 00393 // h = (-1) * 2 * 0.m 00394 // 00395 // If e and m are both zero, h is zero: 00396 // 00397 // h = 0.0 00398 // 00399 // If e is 31, h is an "infinity" or "not a number" (NAN), 00400 // depending on whether m is zero or not. 00401 // 00402 // Examples: 00403 // 00404 // 0 00000 0000000000 = 0.0 00405 // 0 01110 0000000000 = 0.5 00406 // 0 01111 0000000000 = 1.0 00407 // 0 10000 0000000000 = 2.0 00408 // 0 10000 1000000000 = 3.0 00409 // 1 10101 1111000001 = -124.0625 00410 // 0 11111 0000000000 = +infinity 00411 // 1 11111 0000000000 = -infinity 00412 // 0 11111 1000000000 = NAN 00413 // 1 11111 1111111111 = NAN 00414 // 00415 // Conversion: 00416 // 00417 // Converting from a float to a half requires some non-trivial bit 00418 // manipulations. In some cases, this makes conversion relatively 00419 // slow, but the most common case is accelerated via table lookups. 00420 // 00421 // Converting back from a half to a float is easier because we don't 00422 // have to do any rounding. In addition, there are only 65536 00423 // different half numbers; we can convert each of those numbers once 00424 // and store the results in a table. Later, all conversions can be 00425 // done using only simple table lookups. 00426 // 00427 //--------------------------------------------------------------------------- 00428 00429 00430 //-------------------- 00431 // Simple constructors 00432 //-------------------- 00433 00434 inline 00435 half::half () 00436 { 00437 // no initialization 00438 } 00439 00440 00441 //---------------------------- 00442 // Half-from-float constructor 00443 //---------------------------- 00444 00445 inline 00446 half::half (float f) 00447 { 00448 uif x; 00449 00450 x.f = f; 00451 00452 if (f == 0) 00453 { 00454 // 00455 // Common special case - zero. 00456 // Preserve the zero's sign bit. 00457 // 00458 00459 _h = (x.i >> 16); 00460 } 00461 else 00462 { 00463 // 00464 // We extract the combined sign and exponent, e, from our 00465 // floating-point number, f. Then we convert e to the sign 00466 // and exponent of the half number via a table lookup. 00467 // 00468 // For the most common case, where a normalized half is produced, 00469 // the table lookup returns a non-zero value; in this case, all 00470 // we have to do is round f's significand to 10 bits and combine 00471 // the result with e. 00472 // 00473 // For all other cases (overflow, zeroes, denormalized numbers 00474 // resulting from underflow, infinities and NANs), the table 00475 // lookup returns zero, and we call a longer, non-inline function 00476 // to do the float-to-half conversion. 00477 // 00478 00479 register int e = (x.i >> 23) & 0x000001ff; 00480 00481 e = _eLut[e]; 00482 00483 if (e) 00484 { 00485 // 00486 // Simple case - round the significand and 00487 // combine it with the sign and exponent. 00488 // 00489 00490 _h = e + (((x.i & 0x007fffff) + 0x00001000) >> 13); 00491 } 00492 else 00493 { 00494 // 00495 // Difficult case - call a function. 00496 // 00497 00498 _h = convert (x.i); 00499 } 00500 } 00501 } 00502 00503 00504 //------------------------------------------ 00505 // Half-to-float conversion via table lookup 00506 //------------------------------------------ 00507 00508 inline 00509 half::operator float () const 00510 { 00511 return _toFloat[_h].f; 00512 } 00513 00514 00515 //------------------------- 00516 // Round to n-bit precision 00517 //------------------------- 00518 00519 inline half 00520 half::round (unsigned int n) const 00521 { 00522 // 00523 // Parameter check. 00524 // 00525 00526 if (n >= 10) 00527 return *this; 00528 00529 // 00530 // Disassemble h into the sign, s, 00531 // and the combined exponent and significand, e. 00532 // 00533 00534 unsigned short s = _h & 0x8000; 00535 unsigned short e = _h & 0x7fff; 00536 00537 // 00538 // Round the exponent and significand to the nearest value 00539 // where ones occur only in the (10-n) most significant bits. 00540 // Note that the exponent adjusts automatically if rounding 00541 // up causes the significand to overflow. 00542 // 00543 00544 e >>= 9 - n; 00545 e += e & 1; 00546 e <<= 9 - n; 00547 00548 // 00549 // Check for exponent overflow. 00550 // 00551 00552 if (e >= 0x7c00) 00553 { 00554 // 00555 // Overflow occurred -- truncate instead of rounding. 00556 // 00557 00558 e = _h; 00559 e >>= 10 - n; 00560 e <<= 10 - n; 00561 } 00562 00563 // 00564 // Put the original sign bit back. 00565 // 00566 00567 half h; 00568 h._h = s | e; 00569 00570 return h; 00571 } 00572 00573 00574 //----------------------- 00575 // Other inline functions 00576 //----------------------- 00577 00578 inline half 00579 half::operator - () const 00580 { 00581 half h; 00582 h._h = _h ^ 0x8000; 00583 return h; 00584 } 00585 00586 00587 inline half & 00588 half::operator = (half h) 00589 { 00590 _h = h._h; 00591 return *this; 00592 } 00593 00594 00595 inline half & 00596 half::operator = (float f) 00597 { 00598 *this = half (f); 00599 return *this; 00600 } 00601 00602 00603 inline half & 00604 half::operator += (half h) 00605 { 00606 *this = half (float (*this) + float (h)); 00607 return *this; 00608 } 00609 00610 00611 inline half & 00612 half::operator += (float f) 00613 { 00614 *this = half (float (*this) + f); 00615 return *this; 00616 } 00617 00618 00619 inline half & 00620 half::operator -= (half h) 00621 { 00622 *this = half (float (*this) - float (h)); 00623 return *this; 00624 } 00625 00626 00627 inline half & 00628 half::operator -= (float f) 00629 { 00630 *this = half (float (*this) - f); 00631 return *this; 00632 } 00633 00634 00635 inline half & 00636 half::operator *= (half h) 00637 { 00638 *this = half (float (*this) * float (h)); 00639 return *this; 00640 } 00641 00642 00643 inline half & 00644 half::operator *= (float f) 00645 { 00646 *this = half (float (*this) * f); 00647 return *this; 00648 } 00649 00650 00651 inline half & 00652 half::operator /= (half h) 00653 { 00654 *this = half (float (*this) / float (h)); 00655 return *this; 00656 } 00657 00658 00659 inline half & 00660 half::operator /= (float f) 00661 { 00662 *this = half (float (*this) / f); 00663 return *this; 00664 } 00665 00666 00667 inline bool 00668 half::isFinite () const 00669 { 00670 unsigned short e = (_h >> 10) & 0x001f; 00671 return e < 31; 00672 } 00673 00674 00675 inline bool 00676 half::isNormalized () const 00677 { 00678 unsigned short e = (_h >> 10) & 0x001f; 00679 return e > 0 && e < 31; 00680 } 00681 00682 00683 inline bool 00684 half::isDenormalized () const 00685 { 00686 unsigned short e = (_h >> 10) & 0x001f; 00687 unsigned short m = _h & 0x3ff; 00688 return e == 0 && m != 0; 00689 } 00690 00691 00692 inline bool 00693 half::isZero () const 00694 { 00695 return (_h & 0x7fff) == 0; 00696 } 00697 00698 00699 inline bool 00700 half::isNan () const 00701 { 00702 unsigned short e = (_h >> 10) & 0x001f; 00703 unsigned short m = _h & 0x3ff; 00704 return e == 31 && m != 0; 00705 } 00706 00707 00708 inline bool 00709 half::isInfinity () const 00710 { 00711 unsigned short e = (_h >> 10) & 0x001f; 00712 unsigned short m = _h & 0x3ff; 00713 return e == 31 && m == 0; 00714 } 00715 00716 00717 inline bool 00718 half::isNegative () const 00719 { 00720 return (_h & 0x8000) != 0; 00721 } 00722 00723 00724 inline half 00725 half::posInf () 00726 { 00727 half h; 00728 h._h = 0x7c00; 00729 return h; 00730 } 00731 00732 00733 inline half 00734 half::negInf () 00735 { 00736 half h; 00737 h._h = 0xfc00; 00738 return h; 00739 } 00740 00741 00742 inline half 00743 half::qNan () 00744 { 00745 half h; 00746 h._h = 0x7fff; 00747 return h; 00748 } 00749 00750 00751 inline half 00752 half::sNan () 00753 { 00754 half h; 00755 h._h = 0x7dff; 00756 return h; 00757 } 00758 00759 00760 inline unsigned short 00761 half::bits () const 00762 { 00763 return _h; 00764 } 00765 00766 00767 inline void 00768 half::setBits (unsigned short bits) 00769 { 00770 _h = bits; 00771 } 00772 00773 #undef HALF_EXPORT_CONST 00774 00775 #endif 00776