diff --git a/include/bx/uint32_t.h b/include/bx/uint32_t.h index 3817c7a..179ba4e 100644 --- a/include/bx/uint32_t.h +++ b/include/bx/uint32_t.h @@ -449,6 +449,123 @@ namespace bx return result; } + + inline uint16_t halfFromFloat(float _a) + { + union { uint32_t ui; float flt; } ftou; + ftou.flt = _a; + + const uint32_t one = uint32_li(0x00000001); + const uint32_t f_s_mask = uint32_li(0x80000000); + const uint32_t f_e_mask = uint32_li(0x7f800000); + const uint32_t f_m_mask = uint32_li(0x007fffff); + const uint32_t f_m_hidden_bit = uint32_li(0x00800000); + const uint32_t f_m_round_bit = uint32_li(0x00001000); + const uint32_t f_snan_mask = uint32_li(0x7fc00000); + const uint32_t f_e_pos = uint32_li(0x00000017); + const uint32_t h_e_pos = uint32_li(0x0000000a); + const uint32_t h_e_mask = uint32_li(0x00007c00); + const uint32_t h_snan_mask = uint32_li(0x00007e00); + const uint32_t h_e_mask_value = uint32_li(0x0000001f); + const uint32_t f_h_s_pos_offset = uint32_li(0x00000010); + const uint32_t f_h_bias_offset = uint32_li(0x00000070); + const uint32_t f_h_m_pos_offset = uint32_li(0x0000000d); + const uint32_t h_nan_min = uint32_li(0x00007c01); + const uint32_t f_h_e_biased_flag = uint32_li(0x0000008f); + const uint32_t f_s = uint32_and(ftou.ui, f_s_mask); + const uint32_t f_e = uint32_and(ftou.ui, f_e_mask); + const uint16_t h_s = uint32_srl(f_s, f_h_s_pos_offset); + const uint32_t f_m = uint32_and(ftou.ui, f_m_mask); + const uint16_t f_e_amount = uint32_srl(f_e, f_e_pos); + const uint32_t f_e_half_bias = uint32_sub(f_e_amount, f_h_bias_offset); + const uint32_t f_snan = uint32_and(ftou.ui, f_snan_mask); + const uint32_t f_m_round_mask = uint32_and(f_m, f_m_round_bit); + const uint32_t f_m_round_offset = uint32_sll(f_m_round_mask, one); + const uint32_t f_m_rounded = uint32_add(f_m, f_m_round_offset); + const uint32_t f_m_denorm_sa = uint32_sub(one, f_e_half_bias); + const uint32_t f_m_with_hidden = uint32_or(f_m_rounded, f_m_hidden_bit); + const uint32_t f_m_denorm = uint32_srl(f_m_with_hidden, f_m_denorm_sa); + const uint32_t h_m_denorm = uint32_srl(f_m_denorm, f_h_m_pos_offset); + const uint32_t f_m_rounded_overflow = uint32_and(f_m_rounded, f_m_hidden_bit); + const uint32_t m_nan = uint32_srl(f_m, f_h_m_pos_offset); + const uint32_t h_em_nan = uint32_or(h_e_mask, m_nan); + const uint32_t h_e_norm_overflow_offset = uint32_inc(f_e_half_bias); + const uint32_t h_e_norm_overflow = uint32_sll(h_e_norm_overflow_offset, h_e_pos); + const uint32_t h_e_norm = uint32_sll(f_e_half_bias, h_e_pos); + const uint32_t h_m_norm = uint32_srl(f_m_rounded, f_h_m_pos_offset); + const uint32_t h_em_norm = uint32_or(h_e_norm, h_m_norm); + const uint32_t is_h_ndenorm_msb = uint32_sub(f_h_bias_offset, f_e_amount); + const uint32_t is_f_e_flagged_msb = uint32_sub(f_h_e_biased_flag, f_e_half_bias); + const uint32_t is_h_denorm_msb = uint32_not(is_h_ndenorm_msb); + const uint32_t is_f_m_eqz_msb = uint32_dec(f_m); + const uint32_t is_h_nan_eqz_msb = uint32_dec(m_nan); + const uint32_t is_f_inf_msb = uint32_and(is_f_e_flagged_msb, is_f_m_eqz_msb); + const uint32_t is_f_nan_underflow_msb = uint32_and(is_f_e_flagged_msb, is_h_nan_eqz_msb); + const uint32_t is_e_overflow_msb = uint32_sub(h_e_mask_value, f_e_half_bias); + const uint32_t is_h_inf_msb = uint32_or(is_e_overflow_msb, is_f_inf_msb); + const uint32_t is_f_nsnan_msb = uint32_sub(f_snan, f_snan_mask); + const uint32_t is_m_norm_overflow_msb = uint32_neg(f_m_rounded_overflow); + const uint32_t is_f_snan_msb = uint32_not(is_f_nsnan_msb); + const uint32_t h_em_overflow_result = uint32_sels(is_m_norm_overflow_msb, h_e_norm_overflow, h_em_norm); + const uint32_t h_em_nan_result = uint32_sels(is_f_e_flagged_msb, h_em_nan, h_em_overflow_result); + const uint32_t h_em_nan_underflow_result = uint32_sels(is_f_nan_underflow_msb, h_nan_min, h_em_nan_result); + const uint32_t h_em_inf_result = uint32_sels(is_h_inf_msb, h_e_mask, h_em_nan_underflow_result); + const uint32_t h_em_denorm_result = uint32_sels(is_h_denorm_msb, h_m_denorm, h_em_inf_result); + const uint32_t h_em_snan_result = uint32_sels(is_f_snan_msb, h_snan_mask, h_em_denorm_result); + const uint32_t h_result = uint32_or(h_s, h_em_snan_result); + + return (uint16_t)(h_result); + } + + inline float halfToFloat(uint16_t _a) + { + const uint32_t h_e_mask = uint32_li(0x00007c00); + const uint32_t h_m_mask = uint32_li(0x000003ff); + const uint32_t h_s_mask = uint32_li(0x00008000); + const uint32_t h_f_s_pos_offset = uint32_li(0x00000010); + const uint32_t h_f_e_pos_offset = uint32_li(0x0000000d); + const uint32_t h_f_bias_offset = uint32_li(0x0001c000); + const uint32_t f_e_mask = uint32_li(0x7f800000); + const uint32_t f_m_mask = uint32_li(0x007fffff); + const uint32_t h_f_e_denorm_bias = uint32_li(0x0000007e); + const uint32_t h_f_m_denorm_sa_bias = uint32_li(0x00000008); + const uint32_t f_e_pos = uint32_li(0x00000017); + const uint32_t h_e_mask_minus_one = uint32_li(0x00007bff); + const uint32_t h_e = uint32_and(_a, h_e_mask); + const uint32_t h_m = uint32_and(_a, h_m_mask); + const uint32_t h_s = uint32_and(_a, h_s_mask); + const uint32_t h_e_f_bias = uint32_add(h_e, h_f_bias_offset); + const uint32_t h_m_nlz = uint32_cntlz(h_m); + const uint32_t f_s = uint32_sll(h_s, h_f_s_pos_offset); + const uint32_t f_e = uint32_sll(h_e_f_bias, h_f_e_pos_offset); + const uint32_t f_m = uint32_sll(h_m, h_f_e_pos_offset); + const uint32_t f_em = uint32_or(f_e, f_m); + const uint32_t h_f_m_sa = uint32_sub(h_m_nlz, h_f_m_denorm_sa_bias); + const uint32_t f_e_denorm_unpacked = uint32_sub(h_f_e_denorm_bias, h_f_m_sa); + const uint32_t h_f_m = uint32_sll(h_m, h_f_m_sa); + const uint32_t f_m_denorm = uint32_and(h_f_m, f_m_mask); + const uint32_t f_e_denorm = uint32_sll(f_e_denorm_unpacked, f_e_pos); + const uint32_t f_em_denorm = uint32_or(f_e_denorm, f_m_denorm); + const uint32_t f_em_nan = uint32_or(f_e_mask, f_m); + const uint32_t is_e_eqz_msb = uint32_dec(h_e); + const uint32_t is_m_nez_msb = uint32_neg(h_m); + const uint32_t is_e_flagged_msb = uint32_sub(h_e_mask_minus_one, h_e); + const uint32_t is_zero_msb = uint32_andc(is_e_eqz_msb, is_m_nez_msb); + const uint32_t is_inf_msb = uint32_andc(is_e_flagged_msb, is_m_nez_msb); + const uint32_t is_denorm_msb = uint32_and(is_m_nez_msb, is_e_eqz_msb); + const uint32_t is_nan_msb = uint32_and(is_e_flagged_msb, is_m_nez_msb); + const uint32_t is_zero = uint32_ext(is_zero_msb); + const uint32_t f_zero_result = uint32_andc(f_em, is_zero); + const uint32_t f_denorm_result = uint32_sels(is_denorm_msb, f_em_denorm, f_zero_result); + const uint32_t f_inf_result = uint32_sels(is_inf_msb, f_e_mask, f_denorm_result); + const uint32_t f_nan_result = uint32_sels(is_nan_msb, f_em_nan, f_inf_result); + const uint32_t f_result = uint32_or(f_s, f_nan_result); + + union { uint32_t ui; float flt; } utof; + utof.ui = f_result; + return utof.flt; + } + } // namespace bx #endif // __BX_UINT32_T_H__