Fixed Rgb9E5F decoding. Removed union cast in favor of bx::bitCast.

This commit is contained in:
Бранимир Караџић
2024-11-19 22:23:44 -08:00
parent 98b36f44ca
commit 04464c4188
5 changed files with 91 additions and 76 deletions

View File

@@ -24,12 +24,11 @@ namespace bx
inline void* alignPtr(void* _ptr, size_t _extra, size_t _align)
{
union { void* ptr; uintptr_t addr; } un;
un.ptr = _ptr;
uintptr_t unaligned = un.addr + _extra; // space for header
uintptr_t aligned = bx::alignUp(unaligned, int32_t(_align) );
un.addr = aligned;
return un.ptr;
const uintptr_t addr = bitCast<uintptr_t>(_ptr);
const uintptr_t unaligned = addr + _extra; // space for header
const uintptr_t aligned = bx::alignUp(unaligned, int32_t(_align) );
return bitCast<void*>(aligned);
}
inline void* alloc(AllocatorI* _allocator, size_t _size, size_t _align, const Location& _location)

View File

@@ -21,9 +21,7 @@ namespace bx
inline int32_t toSnorm(float _value, float _scale)
{
return int32_t(round(
clamp(_value, -1.0f, 1.0f) * _scale)
);
return int32_t(round(clamp(_value, -1.0f, 1.0f) * _scale) );
}
inline float fromSnorm(int32_t _value, float _scale)
@@ -721,46 +719,49 @@ namespace bx
memCopy(_dst, _src, 8);
}
template<int32_t MantissaBits, int32_t ExpBits>
template<int32_t MantissaBitsT, int32_t ExpBitsT>
inline void encodeRgbE(float* _dst, const float* _src)
{
// Reference(s):
// - https://web.archive.org/web/20181126040035/https://www.khronos.org/registry/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt
//
const int32_t expMax = (1<<ExpBits) - 1;
const int32_t expBias = (1<<(ExpBits - 1) ) - 1;
const int32_t expMax = (1<< ExpBitsT ) - 1;
const int32_t expBias = (1<<(ExpBitsT - 1) ) - 1;
const float sharedExpMax = float(expMax) / float(expMax + 1) * float(1 << (expMax - expBias) );
const float rr = clamp(_src[0], 0.0f, sharedExpMax);
const float gg = clamp(_src[1], 0.0f, sharedExpMax);
const float bb = clamp(_src[2], 0.0f, sharedExpMax);
const float mm = max(rr, gg, bb);
union { float ff; uint32_t ui; } cast = { mm };
int32_t expShared = int32_t(uint32_imax(uint32_t(-expBias-1), ( ( (cast.ui>>23) & 0xff) - 127) ) ) + 1 + expBias;
float denom = pow(2.0f, float(expShared - expBias - MantissaBits) );
const uint32_t mm_as_ui = bitCast<uint32_t>(mm);
if ( (1<<MantissaBits) == int32_t(round(mm/denom) ) )
int32_t expShared = int32_t(max(uint32_t(-expBias-1), ( ( (mm_as_ui>>23) & 0xff) - 127) ) ) + 1 + expBias;
float denom = pow(2.0f, float(expShared - expBias - MantissaBitsT) );
if ( (1<<MantissaBitsT) == int32_t(round(mm/denom) ) )
{
denom *= 2.0f;
++expShared;
}
const float invDenom = 1.0f/denom;
const float invDenom = rcpSafe(denom);
_dst[0] = round(rr * invDenom);
_dst[1] = round(gg * invDenom);
_dst[2] = round(bb * invDenom);
_dst[3] = float(expShared);
}
template<int32_t MantissaBits, int32_t ExpBits>
template<int32_t MantissaBitsT, int32_t ExpBitsT>
inline void decodeRgbE(float* _dst, const float* _src)
{
const int32_t expBias = (1<<(ExpBits - 1) ) - 1;
const float exponent = _src[3]-float(expBias-MantissaBits);
const int32_t expBias = (1<<(ExpBitsT - 1) ) - 1;
const float exponent = _src[3]-float(expBias-MantissaBitsT);
const float scale = pow(2.0f, exponent);
_dst[0] = _src[0] * scale;
_dst[1] = _src[1] * scale;
_dst[2] = _src[2] * scale;
const float invScale = rcpSafe(scale);
_dst[0] = _src[0] * invScale;
_dst[1] = _src[1] * invScale;
_dst[2] = _src[2] * invScale;
}
// RGB9E5F
@@ -779,12 +780,12 @@ namespace bx
inline void unpackRgb9E5F(float* _dst, const void* _src)
{
uint32_t packed = *( (const uint32_t*)_src);
const uint32_t packed = *( (const uint32_t*)_src);
float tmp[4];
tmp[0] = float( ( (packed ) & 0x1ff) ) / 511.0f;
tmp[1] = float( ( (packed>> 9) & 0x1ff) ) / 511.0f;
tmp[2] = float( ( (packed>>18) & 0x1ff) ) / 511.0f;
tmp[0] = float( ( (packed ) & 0x1ff) );
tmp[1] = float( ( (packed>> 9) & 0x1ff) );
tmp[2] = float( ( (packed>>18) & 0x1ff) );
tmp[3] = float( ( (packed>>27) & 0x1f) );
decodeRgbE<9, 5>(_dst, tmp);

View File

@@ -649,15 +649,15 @@ namespace bx
template<typename Ty>
inline BX_CONSTEXPR_FUNC bool isAligned(Ty* _ptr, int32_t _align)
{
union { const void* ptr; uintptr_t addr; } un = { _ptr };
return isAligned(un.addr, _align);
const uintptr_t addr = bitCast<uintptr_t>(_ptr);
return isAligned(addr, _align);
}
template<typename Ty>
inline BX_CONSTEXPR_FUNC bool isAligned(const Ty* _ptr, int32_t _align)
{
union { const void* ptr; uintptr_t addr; } un = { _ptr };
return isAligned(un.addr, _align);
const uintptr_t addr = bitCast<uintptr_t>(_ptr);
return isAligned(addr, _align);
}
template<typename Ty>
@@ -670,17 +670,17 @@ namespace bx
template<typename Ty>
inline BX_CONSTEXPR_FUNC Ty* alignDown(Ty* _ptr, int32_t _align)
{
union { Ty* ptr; uintptr_t addr; } un = { _ptr };
un.addr = alignDown(un.addr, _align);
return un.ptr;
uintptr_t addr = bitCast<uintptr_t>(_ptr);
addr = alignDown(addr, _align);
return bitCast<Ty*>(addr);
}
template<typename Ty>
inline BX_CONSTEXPR_FUNC const Ty* alignDown(const Ty* _ptr, int32_t _align)
{
union { const Ty* ptr; uintptr_t addr; } un = { _ptr };
un.addr = alignDown(un.addr, _align);
return un.ptr;
uintptr_t addr = bitCast<uintptr_t>(_ptr);
addr = alignDown(addr, _align);
return bitCast<const Ty*>(addr);
}
template<typename Ty>
@@ -693,23 +693,22 @@ namespace bx
template<typename Ty>
inline BX_CONSTEXPR_FUNC Ty* alignUp(Ty* _ptr, int32_t _align)
{
union { Ty* ptr; uintptr_t addr; } un = { _ptr };
un.addr = alignUp(un.addr, _align);
return un.ptr;
uintptr_t addr = bitCast<uintptr_t>(_ptr);
addr = alignUp(addr, _align);
return bitCast<Ty*>(addr);
}
template<typename Ty>
inline BX_CONSTEXPR_FUNC const Ty* alignUp(const Ty* _ptr, int32_t _align)
{
union { const Ty* ptr; uintptr_t addr; } un = { _ptr };
un.addr = alignUp(un.addr, _align);
return un.ptr;
uintptr_t addr = bitCast<uintptr_t>(_ptr);
addr = alignUp(addr, _align);
return bitCast<const Ty*>(addr);
}
inline BX_CONST_FUNC uint16_t halfFromFloat(float _a)
{
union { uint32_t ui; float flt; } ftou;
ftou.flt = _a;
const uint32_t a_as_ui = bitCast<uint32_t>(_a);
const uint32_t one = uint32_li(0x00000001);
const uint32_t f_s_mask = uint32_li(kFloatSignMask);
@@ -728,13 +727,13 @@ namespace bx
const uint32_t f_h_m_pos_offset = uint32_li(0x0000000d);
const uint32_t h_nan_min = uint32_li(0x00007c01);
const uint32_t f_h_e_biased_flag = uint32_li(0x0000008f);
const uint32_t f_s = uint32_and(ftou.ui, f_s_mask);
const uint32_t f_e = uint32_and(ftou.ui, f_e_mask);
const uint32_t f_s = uint32_and(a_as_ui, f_s_mask);
const uint32_t f_e = uint32_and(a_as_ui, f_e_mask);
const uint16_t h_s = (uint16_t)uint32_srl(f_s, f_h_s_pos_offset);
const uint32_t f_m = uint32_and(ftou.ui, f_m_mask);
const uint32_t f_m = uint32_and(a_as_ui, f_m_mask);
const uint16_t f_e_amount = (uint16_t)uint32_srl(f_e, f_e_pos);
const uint32_t f_e_half_bias = uint32_sub(f_e_amount, f_h_bias_offset);
const uint32_t f_snan = uint32_and(ftou.ui, f_snan_mask);
const uint32_t f_snan = uint32_and(a_as_ui, f_snan_mask);
const uint32_t f_m_round_mask = uint32_and(f_m, f_m_round_bit);
const uint32_t f_m_round_offset = uint32_sll(f_m_round_mask, one);
const uint32_t f_m_rounded = uint32_add(f_m, f_m_round_offset);
@@ -770,7 +769,7 @@ namespace bx
const uint32_t h_em_snan_result = uint32_sels(is_f_snan_msb, h_snan_mask, h_em_denorm_result);
const uint32_t h_result = uint32_or(h_s, h_em_snan_result);
return (uint16_t)(h_result);
return uint16_t(h_result);
}
inline BX_CONST_FUNC float halfToFloat(uint16_t _a)
@@ -817,9 +816,7 @@ namespace bx
const uint32_t f_nan_result = uint32_sels(is_nan_msb, f_em_nan, f_inf_result);
const uint32_t f_result = uint32_or(f_s, f_nan_result);
union { uint32_t ui; float flt; } utof;
utof.ui = f_result;
return utof.flt;
return bitCast<float>(f_result);
}
} // namespace bx