diff --git a/OgreMain/include/OgreBitwise.h b/OgreMain/include/OgreBitwise.h index 750f23dea7c..3ce2115546b 100644 --- a/OgreMain/include/OgreBitwise.h +++ b/OgreMain/include/OgreBitwise.h @@ -303,7 +303,7 @@ namespace Ogre { } /** Convert a float32 to a float16 (NV_half_float) - Courtesy of OpenEXR + Courtesy of meshoptimizer */ static inline uint16 floatToHalf(float i) { @@ -313,48 +313,29 @@ namespace Ogre { } /** Converts float in uint32 format to a a half in uint16 format */ - static inline uint16 floatToHalfI(uint32 i) + static inline uint16 floatToHalfI(uint32 ui) { - int s = (i >> 16) & 0x00008000; - int e = ((i >> 23) & 0x000000ff) - (127 - 15); - int m = i & 0x007fffff; - - if (e <= 0) - { - if (e < -10) - { - return 0; - } - m = (m | 0x00800000) >> (1 - e); - - return static_cast(s | (m >> 13)); - } - else if (e == 0xff - (127 - 15)) - { - if (m == 0) // Inf - { - return static_cast(s | 0x7c00); - } - else // NAN - { - m >>= 13; - return static_cast(s | 0x7c00 | m | (m == 0)); - } - } - else - { - if (e > 30) // Overflow - { - return static_cast(s | 0x7c00); - } - - return static_cast(s | (e << 10) | (m >> 13)); - } + int s = (ui >> 16) & 0x8000; + int em = ui & 0x7fffffff; + + // bias exponent and round to nearest; 112 is relative exponent bias (127-15) + int h = (em - (112 << 23) + (1 << 12)) >> 13; + + // underflow: flush to zero; 113 encodes exponent -14 + h = (em < (113 << 23)) ? 0 : h; + + // overflow: infinity; 143 encodes exponent 16 + h = (em >= (143 << 23)) ? 0x7c00 : h; + + // NaN; note that we convert all types of NaN to qNaN + h = (em > (255 << 23)) ? 0x7e00 : h; + + return (unsigned short)(s | h); } /** * Convert a float16 (NV_half_float) to a float32 - * Courtesy of OpenEXR + * Courtesy of meshoptimizer */ static inline float halfToFloat(uint16 y) { @@ -365,46 +346,22 @@ namespace Ogre { /** Converts a half in uint16 format to a float in uint32 format */ - static inline uint32 halfToFloatI(uint16 y) + static inline uint32 halfToFloatI(uint16 h) { - int s = (y >> 15) & 0x00000001; - int e = (y >> 10) & 0x0000001f; - int m = y & 0x000003ff; - - if (e == 0) - { - if (m == 0) // Plus or minus zero - { - return s << 31; - } - else // Denormalized number -- renormalize it - { - while (!(m & 0x00000400)) - { - m <<= 1; - e -= 1; - } - - e += 1; - m &= ~0x00000400; - } - } - else if (e == 31) - { - if (m == 0) // Inf - { - return (s << 31) | 0x7f800000; - } - else // NaN - { - return (s << 31) | 0x7f800000 | (m << 13); - } - } - - e = e + (127 - 15); - m = m << 13; - - return (s << 31) | (e << 23) | m; + unsigned int s = unsigned(h & 0x8000) << 16; + int em = h & 0x7fff; + + // bias exponent and pad mantissa with 0; 112 is relative exponent bias (127-15) + int r = (em + (112 << 10)) << 13; + + // denormal: flush to zero + r = (em < (1 << 10)) ? 0 : r; + + // infinity/NaN; note that we preserve NaN payload as a byproduct of unifying inf/nan cases + // 112 is an exponent bias fixup; since we already applied it once, applying it twice converts 31 to 255 + r += (em >= (31 << 10)) ? (112 << 23) : 0; + + return s | r; } diff --git a/Tests/OgreMain/src/PixelFormatTests.cpp b/Tests/OgreMain/src/PixelFormatTests.cpp index f06d06f3f40..10267ebde71 100644 --- a/Tests/OgreMain/src/PixelFormatTests.cpp +++ b/Tests/OgreMain/src/PixelFormatTests.cpp @@ -78,33 +78,20 @@ TEST_F(PixelFormatTests,IntegerPackUnpack) TEST_F(PixelFormatTests,FloatPackUnpack) { // Float32 - float data[4] = {1.0f, 2.0f, 3.0f, 4.0f}; + ColourValue src{0.99999f, -0.9999f, 1.49999f, 1.99999f}; float r,g,b,a; - PixelUtil::unpackColour(&r, &g, &b, &a, PF_FLOAT32_RGBA, data); - EXPECT_EQ(r, 1.0f); - EXPECT_EQ(g, 2.0f); - EXPECT_EQ(b, 3.0f); - EXPECT_EQ(a, 4.0f); + PixelUtil::unpackColour(&r, &g, &b, &a, PF_FLOAT32_RGBA, src.ptr()); + EXPECT_EQ(src, ColourValue(r, g, b, a)); // Float16 - setupBoxes(PF_A8B8G8R8, PF_FLOAT16_RGBA); - mDst2.format = PF_A8B8G8R8; - unsigned int eob = mSrc.getWidth()*4; + ColourValue ref{1.0f, -1.0f, 1.5f, 2.0f}; // conversion to float16 should round to nearest + uint16 data2[4]; + ColourValue dst; - PixelUtil::bulkPixelConversion(mSrc, mDst1); - PixelUtil::bulkPixelConversion(mDst1, mDst2); - - // Locate errors - std::stringstream s; - unsigned int x; - for(x=0; xPF_A8B8G8R8 conversion was not lossless "+s.str(); + EXPECT_EQ(dst, ref); } //-------------------------------------------------------------------------- // Pure 32 bit float precision brute force pixel conversion; for comparison