Skip to content

Commit

Permalink
Main: Bitwise - use floatToHalf from meshoptimizer
Browse files Browse the repository at this point in the history
for correct rounding behaviour
  • Loading branch information
paroj committed Jul 23, 2024
1 parent 0f29071 commit 9a8ecdb
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 99 deletions.
111 changes: 34 additions & 77 deletions OgreMain/include/OgreBitwise.h
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ namespace Ogre {
}

/** Convert a float32 to a float16 (NV_half_float)
Courtesy of OpenEXR
Courtesy of meshoptimizer
*/
static inline uint16 floatToHalf(float i)
{
Expand All @@ -313,48 +313,29 @@ namespace Ogre {
}
/** Converts float in uint32 format to a a half in uint16 format
*/
static inline uint16 floatToHalfI(uint32 i)
static inline uint16 floatToHalfI(uint32 ui)
{
int s = (i >> 16) & 0x00008000;
int e = ((i >> 23) & 0x000000ff) - (127 - 15);
int m = i & 0x007fffff;

if (e <= 0)
{
if (e < -10)
{
return 0;
}
m = (m | 0x00800000) >> (1 - e);

return static_cast<uint16>(s | (m >> 13));
}
else if (e == 0xff - (127 - 15))
{
if (m == 0) // Inf
{
return static_cast<uint16>(s | 0x7c00);
}
else // NAN
{
m >>= 13;
return static_cast<uint16>(s | 0x7c00 | m | (m == 0));
}
}
else
{
if (e > 30) // Overflow
{
return static_cast<uint16>(s | 0x7c00);
}

return static_cast<uint16>(s | (e << 10) | (m >> 13));
}
int s = (ui >> 16) & 0x8000;
int em = ui & 0x7fffffff;

// bias exponent and round to nearest; 112 is relative exponent bias (127-15)
int h = (em - (112 << 23) + (1 << 12)) >> 13;

// underflow: flush to zero; 113 encodes exponent -14
h = (em < (113 << 23)) ? 0 : h;

// overflow: infinity; 143 encodes exponent 16
h = (em >= (143 << 23)) ? 0x7c00 : h;

// NaN; note that we convert all types of NaN to qNaN
h = (em > (255 << 23)) ? 0x7e00 : h;

return (unsigned short)(s | h);
}

/**
* Convert a float16 (NV_half_float) to a float32
* Courtesy of OpenEXR
* Courtesy of meshoptimizer
*/
static inline float halfToFloat(uint16 y)
{
Expand All @@ -365,46 +346,22 @@ namespace Ogre {
/** Converts a half in uint16 format to a float
in uint32 format
*/
static inline uint32 halfToFloatI(uint16 y)
static inline uint32 halfToFloatI(uint16 h)
{
int s = (y >> 15) & 0x00000001;
int e = (y >> 10) & 0x0000001f;
int m = y & 0x000003ff;

if (e == 0)
{
if (m == 0) // Plus or minus zero
{
return s << 31;
}
else // Denormalized number -- renormalize it
{
while (!(m & 0x00000400))
{
m <<= 1;
e -= 1;
}

e += 1;
m &= ~0x00000400;
}
}
else if (e == 31)
{
if (m == 0) // Inf
{
return (s << 31) | 0x7f800000;
}
else // NaN
{
return (s << 31) | 0x7f800000 | (m << 13);
}
}

e = e + (127 - 15);
m = m << 13;

return (s << 31) | (e << 23) | m;
unsigned int s = unsigned(h & 0x8000) << 16;
int em = h & 0x7fff;

// bias exponent and pad mantissa with 0; 112 is relative exponent bias (127-15)
int r = (em + (112 << 10)) << 13;

// denormal: flush to zero
r = (em < (1 << 10)) ? 0 : r;

// infinity/NaN; note that we preserve NaN payload as a byproduct of unifying inf/nan cases
// 112 is an exponent bias fixup; since we already applied it once, applying it twice converts 31 to 255
r += (em >= (31 << 10)) ? (112 << 23) : 0;

return s | r;
}


Expand Down
31 changes: 9 additions & 22 deletions Tests/OgreMain/src/PixelFormatTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,33 +78,20 @@ TEST_F(PixelFormatTests,IntegerPackUnpack)
TEST_F(PixelFormatTests,FloatPackUnpack)
{
// Float32
float data[4] = {1.0f, 2.0f, 3.0f, 4.0f};
ColourValue src{0.99999f, -0.9999f, 1.49999f, 1.99999f};
float r,g,b,a;
PixelUtil::unpackColour(&r, &g, &b, &a, PF_FLOAT32_RGBA, data);
EXPECT_EQ(r, 1.0f);
EXPECT_EQ(g, 2.0f);
EXPECT_EQ(b, 3.0f);
EXPECT_EQ(a, 4.0f);
PixelUtil::unpackColour(&r, &g, &b, &a, PF_FLOAT32_RGBA, src.ptr());
EXPECT_EQ(src, ColourValue(r, g, b, a));

// Float16
setupBoxes(PF_A8B8G8R8, PF_FLOAT16_RGBA);
mDst2.format = PF_A8B8G8R8;
unsigned int eob = mSrc.getWidth()*4;
ColourValue ref{1.0f, -1.0f, 1.5f, 2.0f}; // conversion to float16 should round to nearest
uint16 data2[4];
ColourValue dst;

PixelUtil::bulkPixelConversion(mSrc, mDst1);
PixelUtil::bulkPixelConversion(mDst1, mDst2);

// Locate errors
std::stringstream s;
unsigned int x;
for(x=0; x<eob; x++) {
if(mTemp2[x] != mRandomData[x])
s << std::hex << std::setw(2) << std::setfill('0') << (unsigned int) mRandomData[x]
<< "!= " << std::hex << std::setw(2) << std::setfill('0') << (unsigned int) mTemp2[x] << " ";
}
PixelUtil::bulkPixelConversion(src.ptr(), PF_FLOAT32_RGBA, data2, PF_FLOAT16_RGBA, 1);
PixelUtil::bulkPixelConversion(data2, PF_FLOAT16_RGBA, dst.ptr(), PF_FLOAT32_RGBA, 1);

// src and dst2 should match
EXPECT_TRUE(memcmp(mSrc.data, mDst2.data, eob) == 0) << "PF_FLOAT16_RGBA<->PF_A8B8G8R8 conversion was not lossless "+s.str();
EXPECT_EQ(dst, ref);
}
//--------------------------------------------------------------------------
// Pure 32 bit float precision brute force pixel conversion; for comparison
Expand Down

0 comments on commit 9a8ecdb

Please sign in to comment.