00001 00002 // 00003 // Copyright (c) 2002, Industrial Light & Magic, a division of Lucas 00004 // Digital Ltd. LLC 00005 // 00006 // All rights reserved. 00007 // 00008 // Redistribution and use in source and binary forms, with or without 00009 // modification, are permitted provided that the following conditions are 00010 // met: 00011 // * Redistributions of source code must retain the above copyright 00012 // notice, this list of conditions and the following disclaimer. 00013 // * Redistributions in binary form must reproduce the above 00014 // copyright notice, this list of conditions and the following disclaimer 00015 // in the documentation and/or other materials provided with the 00016 // distribution. 00017 // * Neither the name of Industrial Light & Magic nor the names of 00018 // its contributors may be used to endorse or promote products derived 00019 // from this software without specific prior written permission. 00020 // 00021 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00022 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00023 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 00024 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 00025 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00026 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00027 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00028 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00029 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00030 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00031 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 // 00034 00035 // Primary authors: 00036 // Florian Kainz <kainz@ilm.com> 00037 // Rod Bogart <rgb@ilm.com> 00038 00039 //--------------------------------------------------------------------------- 00040 // 00041 // half -- a 16-bit floating point number class: 00042 // 00043 // Type half can represent positive and negative numbers, whose 00044 // magnitude is between roughly 6.1e-5 and 6.5e+4, with a relative 00045 // error of 9.8e-4; numbers smaller than 6.1e-5 can be represented 00046 // with an absolute error of 6.0e-8. All integers from -2048 to 00047 // +2048 can be represented exactly. 00048 // 00049 // Type half behaves (almost) like the built-in C++ floating point 00050 // types. In arithmetic expressions, half, float and double can be 00051 // mixed freely. Here are a few examples: 00052 // 00053 // half a (3.5); 00054 // float b (a + sqrt (a)); 00055 // a += b; 00056 // b += a; 00057 // b = a + 7; 00058 // 00059 // Conversions from half to float are lossless; all half numbers 00060 // are exactly representable as floats. 00061 // 00062 // Conversions from float to half may not preserve the float's 00063 // value exactly. If a float is not representable as a half, the 00064 // float value is rounded to the nearest representable half. If 00065 // a float value is exactly in the middle between the two closest 00066 // representable half values, then the float value is rounded to 00067 // the half with the greater magnitude. 00068 // 00069 // Overflows during float-to-half conversions cause arithmetic 00070 // exceptions. An overflow occurs when the float value to be 00071 // converted is too large to be represented as a half, or if the 00072 // float value is an infinity or a NAN. 00073 // 00074 // The implementation of type half makes the following assumptions 00075 // about the implementation of the built-in C++ types: 00076 // 00077 // float is an IEEE 754 single-precision number 00078 // sizeof (float) == 4 00079 // sizeof (unsigned int) == sizeof (float) 00080 // alignof (unsigned int) == alignof (float) 00081 // sizeof (unsigned short) == 2 00082 // 00083 //--------------------------------------------------------------------------- 00084 00085 #ifndef _OSGREAL16_H_ 00086 #define _OSGREAL16_H_ 00087 00088 #include <iostream> 00089 00090 #include "OSGConfig.h" 00091 00092 OSG_BEGIN_NAMESPACE 00093 00094 class OSG_BASE_DLLMAPPING Real16 00095 { 00096 public: 00097 00098 //------------- 00099 // Constructors 00100 //------------- 00101 00102 Real16 (); // no initialization 00103 Real16 (float f); 00104 00105 00106 //-------------------- 00107 // Conversion to float 00108 //-------------------- 00109 00110 operator float () const; 00111 00112 00113 //------------ 00114 // Unary minus 00115 //------------ 00116 00117 Real16 operator - () const; 00118 00119 00120 //----------- 00121 // Assignment 00122 //----------- 00123 00124 Real16 & operator = (Real16 h); 00125 Real16 & operator = (float f); 00126 00127 Real16 & operator += (Real16 h); 00128 Real16 & operator += (float f); 00129 00130 Real16 & operator -= (Real16 h); 00131 Real16 & operator -= (float f); 00132 00133 Real16 & operator *= (Real16 h); 00134 Real16 & operator *= (float f); 00135 00136 Real16 & operator /= (Real16 h); 00137 Real16 & operator /= (float f); 00138 00139 00140 //--------------------------------------------------------- 00141 // Round to n-bit precision (n should be between 0 and 10). 00142 // After rounding, the significand's 10-n least significant 00143 // bits will be zero. 00144 //--------------------------------------------------------- 00145 00146 Real16 round (unsigned int n) const; 00147 00148 00149 //-------------------------------------------------------------------- 00150 // Classification: 00151 // 00152 // h.isFinite() returns true if h is a normalized number, 00153 // a denormalized number or zero 00154 // 00155 // h.isNormalized() returns true if h is a normalized number 00156 // 00157 // h.isDenormalized() returns true if h is a denormalized number 00158 // 00159 // h.isZero() returns true if h is zero 00160 // 00161 // h.isNan() returns true if h is a NAN 00162 // 00163 // h.isInfinity() returns true if h is a positive 00164 // or a negative infinity 00165 // 00166 // h.isNegative() returns true if the sign bit of h 00167 // is set (negative) 00168 //-------------------------------------------------------------------- 00169 00170 bool isFinite () const; 00171 bool isNormalized () const; 00172 bool isDenormalized () const; 00173 bool isZero () const; 00174 bool isNan () const; 00175 bool isInfinity () const; 00176 bool isNegative () const; 00177 00178 00179 //-------------------------------------------- 00180 // Special values 00181 // 00182 // posInf() returns +infinity 00183 // 00184 // negInf() returns +infinity 00185 // 00186 // qNan() returns a NAN with the bit 00187 // pattern 0111111111111111 00188 // 00189 // sNan() returns a NAN with the bit 00190 // pattern 0111110111111111 00191 //-------------------------------------------- 00192 00193 static Real16 posInf (); 00194 static Real16 negInf (); 00195 static Real16 qNan (); 00196 static Real16 sNan (); 00197 00198 00199 //-------------------------------------- 00200 // Access to the internal representation 00201 //-------------------------------------- 00202 00203 unsigned short bits () const; 00204 void setBits (unsigned short bits); 00205 00206 00207 public: 00208 00209 union uif 00210 { 00211 unsigned int i; 00212 float f; 00213 }; 00214 00215 private: 00216 00217 static short convert (int i); 00218 static float overflow (); 00219 static bool selftest (); 00220 00221 unsigned short _h; 00222 00223 static const uif _toFloat[1 << 16]; 00224 static const unsigned short _eLut[1 << 9]; 00225 static const bool _itWorks; 00226 }; 00227 00228 00229 //----------- 00230 // Stream I/O 00231 //----------- 00232 00233 std::ostream & operator << (std::ostream &os, Real16 h); 00234 std::istream & operator >> (std::istream &is, Real16 &h); 00235 00236 00237 //---------- 00238 // Debugging 00239 //---------- 00240 00241 void printBits (std::ostream &os, Real16 h); 00242 void printBits (std::ostream &os, float f); 00243 void printBits (char c[19], Real16 h); 00244 void printBits (char c[35], float f); 00245 00246 00247 //------- 00248 // Limits 00249 //------- 00250 00251 //---------------------------------------------------------------- 00252 // Visual C++ will complain if these are not float constants, 00253 // but at least one other compiler (gcc 2.96) produces incorrect 00254 // results if they are. 00255 //---------------------------------------------------------------- 00256 00257 #ifdef WIN32 00258 #define REAL16_MIN 5.96046448e-08f // Smallest positive half 00259 00260 #define REAL16_NRM_MIN 6.10351562e-05f // Smallest positive normalized half 00261 00262 #define REAL16_MAX 65504.0f // Largest positive half 00263 00264 #define REAL16_EPSILON 0.00097656f // Smallest positive e for which 00265 // half (1.0 + e) != half (1.0) 00266 #else 00267 #define REAL16_MIN 5.96046448e-08 // Smallest positive half 00268 00269 #define REAL16_NRM_MIN 6.10351562e-05 // Smallest positive normalized half 00270 00271 #define REAL16_MAX 65504.0f // Largest positive half 00272 00273 #define REAL16_EPSILON 0.00097656 // Smallest positive e for which 00274 // half (1.0 + e) != half (1.0) 00275 #endif // WIN32 00276 00277 #define REAL16_MANT_DIG 11 // Number of digits in mantissa 00278 // (significand + hidden leading 1) 00279 00280 #define REAL16_DIG 2 // Number of base 10 digits that 00281 // can be represented without change 00282 00283 #define REAL16_RADIX 2 // Base of the exponent 00284 00285 #define REAL16_MIN_EXP -13 // Minimum negative integer such that 00286 // REAL16_RADIX raised to the power of 00287 // one less than that integer is a 00288 // normalized half 00289 00290 #define REAL16_MAX_EXP 16 // Maximum positive integer such that 00291 // REAL16_RADIX raised to the power of 00292 // one less than that integer is a 00293 // normalized half 00294 00295 #define REAL16_MIN_10_EXP -4 // Minimum positive integer such 00296 // that 10 raised to that power is 00297 // a normalized half 00298 00299 #define REAL16_MAX_10_EXP 4 // Maximum positive integer such 00300 // that 10 raised to that power is 00301 // a normalized half 00302 00303 00304 //--------------------------------------------------------------------------- 00305 // 00306 // Implementation -- 00307 // 00308 // Representation of a float: 00309 // 00310 // We assume that a float, f, is an IEEE 754 single-precision 00311 // floating point number, whose bits are arranged as follows: 00312 // 00313 // 31 (msb) 00314 // | 00315 // | 30 23 00316 // | | | 00317 // | | | 22 0 (lsb) 00318 // | | | | | 00319 // X XXXXXXXX XXXXXXXXXXXXXXXXXXXXXXX 00320 // 00321 // s e m 00322 // 00323 // S is the sign-bit, e is the exponent and m is the significand. 00324 // 00325 // If e is between 1 and 254, f is a normalized number: 00326 // 00327 // s e-127 00328 // f = (-1) * 2 * 1.m 00329 // 00330 // If e is 0, and m is not zero, f is a denormalized number: 00331 // 00332 // s -126 00333 // f = (-1) * 2 * 0.m 00334 // 00335 // If e and m are both zero, f is zero: 00336 // 00337 // f = 0.0 00338 // 00339 // If e is 255, f is an "infinity" or "not a number" (NAN), 00340 // depending on whether m is zero or not. 00341 // 00342 // Examples: 00343 // 00344 // 0 00000000 00000000000000000000000 = 0.0 00345 // 0 01111110 00000000000000000000000 = 0.5 00346 // 0 01111111 00000000000000000000000 = 1.0 00347 // 0 10000000 00000000000000000000000 = 2.0 00348 // 0 10000000 10000000000000000000000 = 3.0 00349 // 1 10000101 11110000010000000000000 = -124.0625 00350 // 0 11111111 00000000000000000000000 = +infinity 00351 // 1 11111111 00000000000000000000000 = -infinity 00352 // 0 11111111 10000000000000000000000 = NAN 00353 // 1 11111111 11111111111111111111111 = NAN 00354 // 00355 // Representation of a half: 00356 // 00357 // Here is the bit-layout for a half number, h: 00358 // 00359 // 15 (msb) 00360 // | 00361 // | 14 10 00362 // | | | 00363 // | | | 9 0 (lsb) 00364 // | | | | | 00365 // X XXXXX XXXXXXXXXX 00366 // 00367 // s e m 00368 // 00369 // S is the sign-bit, e is the exponent and m is the significand. 00370 // 00371 // If e is between 1 and 30, h is a normalized number: 00372 // 00373 // s e-15 00374 // h = (-1) * 2 * 1.m 00375 // 00376 // If e is 0, and m is not zero, h is a denormalized number: 00377 // 00378 // S -14 00379 // h = (-1) * 2 * 0.m 00380 // 00381 // If e and m are both zero, h is zero: 00382 // 00383 // h = 0.0 00384 // 00385 // If e is 31, h is an "infinity" or "not a number" (NAN), 00386 // depending on whether m is zero or not. 00387 // 00388 // Examples: 00389 // 00390 // 0 00000 0000000000 = 0.0 00391 // 0 01110 0000000000 = 0.5 00392 // 0 01111 0000000000 = 1.0 00393 // 0 10000 0000000000 = 2.0 00394 // 0 10000 1000000000 = 3.0 00395 // 1 10101 1111000001 = -124.0625 00396 // 0 11111 0000000000 = +infinity 00397 // 1 11111 0000000000 = -infinity 00398 // 0 11111 1000000000 = NAN 00399 // 1 11111 1111111111 = NAN 00400 // 00401 // Conversion: 00402 // 00403 // Converting from a float to a half requires some non-trivial bit 00404 // manipulations. In some cases, this makes conversion relatively 00405 // slow, but the most common case is accelerated via table lookups. 00406 // 00407 // Converting back from a half to a float is easier because we don't 00408 // have to do any rounding. In addition, there are only 65536 00409 // different half numbers; we can convert each of those numbers once 00410 // and store the results in a table. Later, all conversions can be 00411 // done using only simple table lookups. 00412 // 00413 //--------------------------------------------------------------------------- 00414 00415 00416 //-------------------- 00417 // Simple constructors 00418 //-------------------- 00419 00420 inline 00421 Real16::Real16 () 00422 { 00423 // no initialization 00424 } 00425 00426 00427 //---------------------------- 00428 // Half-from-float constructor 00429 //---------------------------- 00430 00431 inline 00432 Real16::Real16 (float f) 00433 { 00434 if (f == 0) 00435 { 00436 // 00437 // Common special case - zero. 00438 // For speed, we don't preserve the zero's sign. 00439 // 00440 00441 _h = 0; 00442 } 00443 else 00444 { 00445 // 00446 // We extract the combined sign and exponent, e, from our 00447 // floating-point number, f. Then we convert e to the sign 00448 // and exponent of the half number via a table lookup. 00449 // 00450 // For the most common case, where a normalized half is produced, 00451 // the table lookup returns a non-zero value; in this case, all 00452 // we have to do, is round f's significand to 10 bits and combine 00453 // the result with e. 00454 // 00455 // For all other cases (overflow, zeroes, denormalized numbers 00456 // resulting from underflow, infinities and NANs), the table 00457 // lookup returns zero, and we call a longer, non-inline function 00458 // to do the float-to-half conversion. 00459 // 00460 00461 uif x; 00462 00463 x.f = f; 00464 00465 register int e = (x.i >> 23) & 0x000001ff; 00466 00467 e = _eLut[e]; 00468 00469 if (e) 00470 { 00471 // 00472 // Simple case - round the significand and 00473 // combine it with the sign and exponent. 00474 // 00475 00476 _h = e + (((x.i & 0x007fffff) + 0x00001000) >> 13); 00477 } 00478 else 00479 { 00480 // 00481 // Difficult case - call a function. 00482 // 00483 00484 _h = convert (x.i); 00485 } 00486 } 00487 } 00488 00489 00490 //------------------------------------------ 00491 // Half-to-float conversion via table lookup 00492 //------------------------------------------ 00493 00494 inline 00495 Real16::operator float () const 00496 { 00497 return _toFloat[_h].f; 00498 } 00499 00500 00501 //------------------------- 00502 // Round to n-bit precision 00503 //------------------------- 00504 00505 inline Real16 00506 Real16::round (unsigned int n) const 00507 { 00508 // 00509 // Parameter check. 00510 // 00511 00512 if (n >= 10) 00513 return *this; 00514 00515 // 00516 // Disassemble h into the sign, s, 00517 // and the combined exponent and significand, e. 00518 // 00519 00520 unsigned short s = _h & 0x8000; 00521 unsigned short e = _h & 0x7fff; 00522 00523 // 00524 // Round the exponent and significand to the nearest value 00525 // where ones occur only in the (10-n) most significant bits. 00526 // Note that the exponent adjusts automatically if rounding 00527 // up causes the significand to overflow. 00528 // 00529 00530 e >>= 9 - n; 00531 e += e & 1; 00532 e <<= 9 - n; 00533 00534 // 00535 // Check for exponent overflow. 00536 // 00537 00538 if (e >= 0x7c00) 00539 { 00540 // 00541 // Overflow occurred -- truncate instead of rounding. 00542 // 00543 00544 e = _h; 00545 e >>= 10 - n; 00546 e <<= 10 - n; 00547 } 00548 00549 // 00550 // Put the original sign bit back. 00551 // 00552 00553 Real16 h; 00554 h._h = s | e; 00555 00556 return h; 00557 } 00558 00559 00560 //----------------------- 00561 // Other inline functions 00562 //----------------------- 00563 00564 inline Real16 00565 Real16::operator - () const 00566 { 00567 Real16 h; 00568 h._h = _h ^ 0x8000; 00569 return h; 00570 } 00571 00572 00573 inline Real16 & 00574 Real16::operator = (Real16 h) 00575 { 00576 _h = h._h; 00577 return *this; 00578 } 00579 00580 00581 inline Real16 & 00582 Real16::operator = (float f) 00583 { 00584 *this = Real16 (f); 00585 return *this; 00586 } 00587 00588 00589 inline Real16 & 00590 Real16::operator += (Real16 h) 00591 { 00592 *this = Real16 (float (*this) + float (h)); 00593 return *this; 00594 } 00595 00596 00597 inline Real16 & 00598 Real16::operator += (float f) 00599 { 00600 *this = Real16 (float (*this) + f); 00601 return *this; 00602 } 00603 00604 00605 inline Real16 & 00606 Real16::operator -= (Real16 h) 00607 { 00608 *this = Real16 (float (*this) - float (h)); 00609 return *this; 00610 } 00611 00612 00613 inline Real16 & 00614 Real16::operator -= (float f) 00615 { 00616 *this = Real16 (float (*this) - f); 00617 return *this; 00618 } 00619 00620 00621 inline Real16 & 00622 Real16::operator *= (Real16 h) 00623 { 00624 *this = Real16 (float (*this) * float (h)); 00625 return *this; 00626 } 00627 00628 00629 inline Real16 & 00630 Real16::operator *= (float f) 00631 { 00632 *this = Real16 (float (*this) * f); 00633 return *this; 00634 } 00635 00636 00637 inline Real16 & 00638 Real16::operator /= (Real16 h) 00639 { 00640 *this = Real16 (float (*this) / float (h)); 00641 return *this; 00642 } 00643 00644 00645 inline Real16 & 00646 Real16::operator /= (float f) 00647 { 00648 *this = Real16 (float (*this) / f); 00649 return *this; 00650 } 00651 00652 00653 inline bool 00654 Real16::isFinite () const 00655 { 00656 unsigned short e = (_h >> 10) & 0x001f; 00657 return e < 31; 00658 } 00659 00660 00661 inline bool 00662 Real16::isNormalized () const 00663 { 00664 unsigned short e = (_h >> 10) & 0x001f; 00665 return e > 0 && e < 31; 00666 } 00667 00668 00669 inline bool 00670 Real16::isDenormalized () const 00671 { 00672 unsigned short e = (_h >> 10) & 0x001f; 00673 unsigned short m = _h & 0x3ff; 00674 return e == 0 && m != 0; 00675 } 00676 00677 00678 inline bool 00679 Real16::isZero () const 00680 { 00681 return (_h & 0x7fff) == 0; 00682 } 00683 00684 00685 inline bool 00686 Real16::isNan () const 00687 { 00688 unsigned short e = (_h >> 10) & 0x001f; 00689 unsigned short m = _h & 0x3ff; 00690 return e == 31 && m != 0; 00691 } 00692 00693 00694 inline bool 00695 Real16::isInfinity () const 00696 { 00697 unsigned short e = (_h >> 10) & 0x001f; 00698 unsigned short m = _h & 0x3ff; 00699 return e == 31 && m == 0; 00700 } 00701 00702 00703 inline bool 00704 Real16::isNegative () const 00705 { 00706 return (_h & 0x8000) != 0; 00707 } 00708 00709 00710 inline Real16 00711 Real16::posInf () 00712 { 00713 Real16 h; 00714 h._h = 0x7c00; 00715 return h; 00716 } 00717 00718 00719 inline Real16 00720 Real16::negInf () 00721 { 00722 Real16 h; 00723 h._h = 0xfc00; 00724 return h; 00725 } 00726 00727 00728 inline Real16 00729 Real16::qNan () 00730 { 00731 Real16 h; 00732 h._h = 0x7fff; 00733 return h; 00734 } 00735 00736 00737 inline Real16 00738 Real16::sNan () 00739 { 00740 Real16 h; 00741 h._h = 0x7dff; 00742 return h; 00743 } 00744 00745 00746 inline unsigned short 00747 Real16::bits () const 00748 { 00749 return _h; 00750 } 00751 00752 00753 inline void 00754 Real16::setBits (unsigned short bits) 00755 { 00756 _h = bits; 00757 } 00758 00759 OSG_END_NAMESPACE 00760 00761 #endif /* _OSGREAL16_H_ */