Fixed Rgb9E5F decoding. Removed union cast in favor of bx::bitCast.

2026-02-17 20:52:37 +01:00 · 2024-11-19 22:23:44 -08:00
parent 98b36f44ca
commit 04464c4188
5 changed files with 91 additions and 76 deletions
--- a/include/bx/inline/allocator.inl
+++ b/include/bx/inline/allocator.inl
@@ -24,12 +24,11 @@ namespace bx

 	inline void* alignPtr(void* _ptr, size_t _extra, size_t _align)
 	{
-		union { void* ptr; uintptr_t addr; } un;
-		un.ptr = _ptr;
-		uintptr_t unaligned = un.addr + _extra; // space for header
-		uintptr_t aligned = bx::alignUp(unaligned, int32_t(_align) );
-		un.addr = aligned;
-		return un.ptr;
+		const uintptr_t addr = bitCast<uintptr_t>(_ptr);
+		const uintptr_t unaligned = addr + _extra; // space for header
+		const uintptr_t aligned = bx::alignUp(unaligned, int32_t(_align) );
+
+		return bitCast<void*>(aligned);
 	}

 	inline void* alloc(AllocatorI* _allocator, size_t _size, size_t _align, const Location& _location)
--- a/include/bx/inline/pixelformat.inl
+++ b/include/bx/inline/pixelformat.inl
@@ -21,9 +21,7 @@ namespace bx

 	inline int32_t toSnorm(float _value, float _scale)
 	{
-		return int32_t(round(
-					clamp(_value, -1.0f, 1.0f) * _scale)
-					);
+		return int32_t(round(clamp(_value, -1.0f, 1.0f) * _scale) );
 	}

 	inline float fromSnorm(int32_t _value, float _scale)
@@ -721,46 +719,49 @@ namespace bx
 		memCopy(_dst, _src, 8);
 	}

-	template<int32_t MantissaBits, int32_t ExpBits>
+	template<int32_t MantissaBitsT, int32_t ExpBitsT>
 	inline void encodeRgbE(float* _dst, const float* _src)
 	{
 		// Reference(s):
 		// - https://web.archive.org/web/20181126040035/https://www.khronos.org/registry/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt
 		//
-		const int32_t expMax  = (1<<ExpBits) - 1;
-		const int32_t expBias = (1<<(ExpBits - 1) ) - 1;
+		const int32_t expMax  = (1<< ExpBitsT      ) - 1;
+		const int32_t expBias = (1<<(ExpBitsT - 1) ) - 1;
 		const float   sharedExpMax = float(expMax) / float(expMax + 1) * float(1 << (expMax - expBias) );

 		const float rr = clamp(_src[0], 0.0f, sharedExpMax);
 		const float gg = clamp(_src[1], 0.0f, sharedExpMax);
 		const float bb = clamp(_src[2], 0.0f, sharedExpMax);
 		const float mm = max(rr, gg, bb);
-		union { float ff; uint32_t ui; } cast = { mm };
-		int32_t expShared = int32_t(uint32_imax(uint32_t(-expBias-1), ( ( (cast.ui>>23) & 0xff) - 127) ) ) + 1 + expBias;
-		float denom = pow(2.0f, float(expShared - expBias - MantissaBits) );
+		const uint32_t mm_as_ui = bitCast<uint32_t>(mm);

-		if ( (1<<MantissaBits) == int32_t(round(mm/denom) ) )
+		int32_t expShared = int32_t(max(uint32_t(-expBias-1), ( ( (mm_as_ui>>23) & 0xff) - 127) ) ) + 1 + expBias;
+		float denom = pow(2.0f, float(expShared - expBias - MantissaBitsT) );
+
+		if ( (1<<MantissaBitsT) == int32_t(round(mm/denom) ) )
 		{
 			denom *= 2.0f;
 			++expShared;
 		}

-		const float invDenom = 1.0f/denom;
+		const float invDenom = rcpSafe(denom);
 		_dst[0] = round(rr * invDenom);
 		_dst[1] = round(gg * invDenom);
 		_dst[2] = round(bb * invDenom);
 		_dst[3] = float(expShared);
 	}

-	template<int32_t MantissaBits, int32_t ExpBits>
+	template<int32_t MantissaBitsT, int32_t ExpBitsT>
 	inline void decodeRgbE(float* _dst, const float* _src)
 	{
-		const int32_t expBias = (1<<(ExpBits - 1) ) - 1;
-		const float exponent  = _src[3]-float(expBias-MantissaBits);
+		const int32_t expBias = (1<<(ExpBitsT - 1) ) - 1;
+		const float exponent  = _src[3]-float(expBias-MantissaBitsT);
 		const float scale     = pow(2.0f, exponent);
-		_dst[0] = _src[0] * scale;
-		_dst[1] = _src[1] * scale;
-		_dst[2] = _src[2] * scale;
+		const float invScale  = rcpSafe(scale);
+
+		_dst[0] = _src[0] * invScale;
+		_dst[1] = _src[1] * invScale;
+		_dst[2] = _src[2] * invScale;
 	}

 	// RGB9E5F
@@ -779,12 +780,12 @@ namespace bx

 	inline void unpackRgb9E5F(float* _dst, const void* _src)
 	{
-		uint32_t packed = *( (const uint32_t*)_src);
+		const uint32_t packed = *( (const uint32_t*)_src);

 		float tmp[4];
-		tmp[0] = float( ( (packed    ) & 0x1ff) ) / 511.0f;
-		tmp[1] = float( ( (packed>> 9) & 0x1ff) ) / 511.0f;
-		tmp[2] = float( ( (packed>>18) & 0x1ff) ) / 511.0f;
+		tmp[0] = float( ( (packed    ) & 0x1ff) );
+		tmp[1] = float( ( (packed>> 9) & 0x1ff) );
+		tmp[2] = float( ( (packed>>18) & 0x1ff) );
 		tmp[3] = float( ( (packed>>27) &  0x1f) );

 		decodeRgbE<9, 5>(_dst, tmp);
--- a/include/bx/inline/uint32_t.inl
+++ b/include/bx/inline/uint32_t.inl
@@ -649,15 +649,15 @@ namespace bx
 	template<typename Ty>
 	inline BX_CONSTEXPR_FUNC bool isAligned(Ty* _ptr, int32_t _align)
 	{
-		union { const void* ptr; uintptr_t addr; } un = { _ptr };
-		return isAligned(un.addr, _align);
+		const uintptr_t addr = bitCast<uintptr_t>(_ptr);
+		return isAligned(addr, _align);
 	}

 	template<typename Ty>
 	inline BX_CONSTEXPR_FUNC bool isAligned(const Ty* _ptr, int32_t _align)
 	{
-		union { const void* ptr; uintptr_t addr; } un = { _ptr };
-		return isAligned(un.addr, _align);
+		const uintptr_t addr = bitCast<uintptr_t>(_ptr);
+		return isAligned(addr, _align);
 	}

 	template<typename Ty>
@@ -670,17 +670,17 @@ namespace bx
 	template<typename Ty>
 	inline BX_CONSTEXPR_FUNC Ty* alignDown(Ty* _ptr, int32_t _align)
 	{
-		union { Ty* ptr; uintptr_t addr; } un = { _ptr };
-		un.addr = alignDown(un.addr, _align);
-		return un.ptr;
+		uintptr_t addr = bitCast<uintptr_t>(_ptr);
+		addr = alignDown(addr, _align);
+		return bitCast<Ty*>(addr);
 	}

 	template<typename Ty>
 	inline BX_CONSTEXPR_FUNC const Ty* alignDown(const Ty* _ptr, int32_t _align)
 	{
-		union { const Ty* ptr; uintptr_t addr; } un = { _ptr };
-		un.addr = alignDown(un.addr, _align);
-		return un.ptr;
+		uintptr_t addr = bitCast<uintptr_t>(_ptr);
+		addr = alignDown(addr, _align);
+		return bitCast<const Ty*>(addr);
 	}

 	template<typename Ty>
@@ -693,23 +693,22 @@ namespace bx
 	template<typename Ty>
 	inline BX_CONSTEXPR_FUNC Ty* alignUp(Ty* _ptr, int32_t _align)
 	{
-		union { Ty* ptr; uintptr_t addr; } un = { _ptr };
-		un.addr = alignUp(un.addr, _align);
-		return un.ptr;
+		uintptr_t addr = bitCast<uintptr_t>(_ptr);
+		addr = alignUp(addr, _align);
+		return bitCast<Ty*>(addr);
 	}

 	template<typename Ty>
 	inline BX_CONSTEXPR_FUNC const Ty* alignUp(const Ty* _ptr, int32_t _align)
 	{
-		union { const Ty* ptr; uintptr_t addr; } un = { _ptr };
-		un.addr = alignUp(un.addr, _align);
-		return un.ptr;
+		uintptr_t addr = bitCast<uintptr_t>(_ptr);
+		addr = alignUp(addr, _align);
+		return bitCast<const Ty*>(addr);
 	}

 	inline BX_CONST_FUNC uint16_t halfFromFloat(float _a)
 	{
-		union { uint32_t ui; float flt; } ftou;
-		ftou.flt = _a;
+		const uint32_t a_as_ui = bitCast<uint32_t>(_a);

 		const uint32_t one                       = uint32_li(0x00000001);
 		const uint32_t f_s_mask                  = uint32_li(kFloatSignMask);
@@ -728,13 +727,13 @@ namespace bx
 		const uint32_t f_h_m_pos_offset          = uint32_li(0x0000000d);
 		const uint32_t h_nan_min                 = uint32_li(0x00007c01);
 		const uint32_t f_h_e_biased_flag         = uint32_li(0x0000008f);
-		const uint32_t f_s                       = uint32_and(ftou.ui, f_s_mask);
-		const uint32_t f_e                       = uint32_and(ftou.ui, f_e_mask);
+		const uint32_t f_s                       = uint32_and(a_as_ui, f_s_mask);
+		const uint32_t f_e                       = uint32_and(a_as_ui, f_e_mask);
 		const uint16_t h_s                       = (uint16_t)uint32_srl(f_s, f_h_s_pos_offset);
-		const uint32_t f_m                       = uint32_and(ftou.ui, f_m_mask);
+		const uint32_t f_m                       = uint32_and(a_as_ui, f_m_mask);
 		const uint16_t f_e_amount                = (uint16_t)uint32_srl(f_e, f_e_pos);
 		const uint32_t f_e_half_bias             = uint32_sub(f_e_amount, f_h_bias_offset);
-		const uint32_t f_snan                    = uint32_and(ftou.ui, f_snan_mask);
+		const uint32_t f_snan                    = uint32_and(a_as_ui, f_snan_mask);
 		const uint32_t f_m_round_mask            = uint32_and(f_m, f_m_round_bit);
 		const uint32_t f_m_round_offset          = uint32_sll(f_m_round_mask, one);
 		const uint32_t f_m_rounded               = uint32_add(f_m, f_m_round_offset);
@@ -770,7 +769,7 @@ namespace bx
 		const uint32_t h_em_snan_result          = uint32_sels(is_f_snan_msb, h_snan_mask, h_em_denorm_result);
 		const uint32_t h_result                  = uint32_or(h_s, h_em_snan_result);

-		return (uint16_t)(h_result);
+		return uint16_t(h_result);
 	}

 	inline BX_CONST_FUNC float halfToFloat(uint16_t _a)
@@ -817,9 +816,7 @@ namespace bx
 		const uint32_t f_nan_result         = uint32_sels(is_nan_msb, f_em_nan, f_inf_result);
 		const uint32_t f_result             = uint32_or(f_s, f_nan_result);

-		union { uint32_t ui; float flt; } utof;
-		utof.ui = f_result;
-		return utof.flt;
+		return bitCast<float>(f_result);
 	}

 } // namespace bx