diff --git a/src/image.cpp b/src/image.cpp index f232ab1..29a7236 100644 --- a/src/image.cpp +++ b/src/image.cpp @@ -361,30 +361,30 @@ namespace bimg const uint8_t* rgba = src; for (uint32_t xx = 0; xx < dstWidth; ++xx, rgba += 8, dst += 4) { - float rr = bx::pow(rgba[ 0], 2.2f); - float gg = bx::pow(rgba[ 1], 2.2f); - float bb = bx::pow(rgba[ 2], 2.2f); - float aa = rgba[ 3]; - rr += bx::pow(rgba[ 4], 2.2f); - gg += bx::pow(rgba[ 5], 2.2f); - bb += bx::pow(rgba[ 6], 2.2f); - aa += rgba[ 7]; - rr += bx::pow(rgba[_srcPitch+0], 2.2f); - gg += bx::pow(rgba[_srcPitch+1], 2.2f); - bb += bx::pow(rgba[_srcPitch+2], 2.2f); - aa += rgba[_srcPitch+3]; - rr += bx::pow(rgba[_srcPitch+4], 2.2f); - gg += bx::pow(rgba[_srcPitch+5], 2.2f); - bb += bx::pow(rgba[_srcPitch+6], 2.2f); - aa += rgba[_srcPitch+7]; + float rr = bx::toLinear(rgba[ 0]); + float gg = bx::toLinear(rgba[ 1]); + float bb = bx::toLinear(rgba[ 2]); + float aa = rgba[ 3]; + rr += bx::toLinear(rgba[ 4]); + gg += bx::toLinear(rgba[ 5]); + bb += bx::toLinear(rgba[ 6]); + aa += rgba[ 7]; + rr += bx::toLinear(rgba[_srcPitch+0]); + gg += bx::toLinear(rgba[_srcPitch+1]); + bb += bx::toLinear(rgba[_srcPitch+2]); + aa += rgba[_srcPitch+3]; + rr += bx::toLinear(rgba[_srcPitch+4]); + gg += bx::toLinear(rgba[_srcPitch+5]); + bb += bx::toLinear(rgba[_srcPitch+6]); + aa += rgba[_srcPitch+7]; rr *= 0.25f; gg *= 0.25f; bb *= 0.25f; aa *= 0.25f; - rr = bx::pow(rr, 1.0f/2.2f); - gg = bx::pow(gg, 1.0f/2.2f); - bb = bx::pow(bb, 1.0f/2.2f); + rr = bx::toGamma(rr); + gg = bx::toGamma(gg); + bb = bx::toGamma(bb); dst[0] = (uint8_t)rr; dst[1] = (uint8_t)gg; dst[2] = (uint8_t)bb; @@ -394,6 +394,43 @@ namespace bimg } } + BX_SIMD_INLINE bx::simd128_t simd_to_linear(bx::simd128_t _a) + { + using namespace bx; + const simd128_t f12_92 = simd_ld(12.92f, 12.92f, 12.92f, 1.0f); + const simd128_t f0_055 = simd_ld(0.055f, 0.055f, 0.055f, 0.0f); + const simd128_t f1_055 = simd_ld(1.055f, 1.055f, 1.055f, 1.0f); + const simd128_t f2_4 = simd_ld(2.4f, 2.4f, 2.4f, 1.0f); + const simd128_t f0_04045 = simd_ld(0.04045f, 0.04045f, 0.04045f, 0.0f); + const simd128_t lo = simd_div(_a, f12_92); + const simd128_t tmp0 = simd_add(_a, f0_055); + const simd128_t tmp1 = simd_div(tmp0, f1_055); + const simd128_t hi = simd_pow(tmp1, f2_4); + const simd128_t mask = simd_cmple(_a, f0_04045); + const simd128_t result = simd_selb(mask, hi, lo); + + return result; + } + + BX_SIMD_INLINE bx::simd128_t simd_to_gamma(bx::simd128_t _a) + { + using namespace bx; + const simd128_t f12_92 = simd_ld(12.92f, 12.92f, 12.92f, 1.0f); + const simd128_t f0_055 = simd_ld(0.055f, 0.055f, 0.055f, 0.0f); + const simd128_t f1_055 = simd_ld(1.055f, 1.055f, 1.055f, 1.0f); + const simd128_t f1o2_4 = simd_ld(1.0f/2.4f, 1.0f/2.4f, 1.0f/2.4f, 1.0f); + const simd128_t f0_0031308 = simd_ld(0.0031308f, 0.0031308f, 0.0031308f, 0.0f); + const simd128_t lo = simd_mul(_a, f12_92); + const simd128_t absa = simd_abs(_a); + const simd128_t tmp0 = simd_pow(absa, f1o2_4); + const simd128_t tmp1 = simd_mul(tmp0, f1_055); + const simd128_t hi = simd_sub(tmp1, f0_055); + const simd128_t mask = simd_cmple(_a, f0_0031308); + const simd128_t result = simd_selb(mask, hi, lo); + + return result; + } + void imageRgba8Downsample2x2(void* _dst, uint32_t _width, uint32_t _height, uint32_t _depth, uint32_t _srcPitch, uint32_t _dstPitch, const void* _src) { const uint32_t dstWidth = _width/2; @@ -414,8 +451,6 @@ namespace bimg const simd128_t pmask = simd_ild(0xff, 0x7f80, 0xff0000, 0x7f800000); const simd128_t wflip = simd_ild(0, 0, 0, 0x80000000); const simd128_t wadd = simd_ld(0.0f, 0.0f, 0.0f, 32768.0f*65536.0f); - const simd128_t gamma = simd_ld(1.0f/2.2f, 1.0f/2.2f, 1.0f/2.2f, 1.0f); - const simd128_t linear = simd_ld(2.2f, 2.2f, 2.2f, 1.0f); const simd128_t quater = simd_splat(0.25f); for (uint32_t zz = 0; zz < _depth; ++zz) @@ -452,16 +487,16 @@ namespace bimg const simd128_t abgr2n = simd_mul(abgr2c, unpack); const simd128_t abgr3n = simd_mul(abgr3c, unpack); - const simd128_t abgr0l = simd_pow(abgr0n, linear); - const simd128_t abgr1l = simd_pow(abgr1n, linear); - const simd128_t abgr2l = simd_pow(abgr2n, linear); - const simd128_t abgr3l = simd_pow(abgr3n, linear); + const simd128_t abgr0l = simd_to_linear(abgr0n); + const simd128_t abgr1l = simd_to_linear(abgr1n); + const simd128_t abgr2l = simd_to_linear(abgr2n); + const simd128_t abgr3l = simd_to_linear(abgr3n); const simd128_t sum0 = simd_add(abgr0l, abgr1l); const simd128_t sum1 = simd_add(abgr2l, abgr3l); const simd128_t sum2 = simd_add(sum0, sum1); const simd128_t avg0 = simd_mul(sum2, quater); - const simd128_t avg1 = simd_pow(avg0, gamma); + const simd128_t avg1 = simd_to_gamma(avg0); const simd128_t avg2 = simd_mul(avg1, pack); const simd128_t ftoi0 = simd_ftoi(avg2); @@ -493,10 +528,10 @@ namespace bimg float* fd = ( float*)(dst + offset); const float* fs = (const float*)(src + offset); - fd[0] = bx::pow(fs[0], 1.0f/2.2f); - fd[1] = bx::pow(fs[1], 1.0f/2.2f); - fd[2] = bx::pow(fs[2], 1.0f/2.2f); - fd[3] = fs[3]; + fd[0] = bx::toLinear(fs[0]); + fd[1] = bx::toLinear(fs[1]); + fd[2] = bx::toLinear(fs[2]); + fd[3] = fs[3]; } } } @@ -517,10 +552,10 @@ namespace bimg float* fd = ( float*)(dst + offset); const float* fs = (const float*)(src + offset); - fd[0] = bx::pow(fs[0], 2.2f); - fd[1] = bx::pow(fs[1], 2.2f); - fd[2] = bx::pow(fs[2], 2.2f); - fd[3] = fs[3]; + fd[0] = bx::toGamma(fs[0]); + fd[1] = bx::toGamma(fs[1]); + fd[2] = bx::toGamma(fs[2]); + fd[3] = fs[3]; } } } @@ -530,6 +565,7 @@ namespace bimg { const uint32_t dstWidth = _width/2; const uint32_t dstHeight = _height/2; + const uint32_t dstDepth = _depth/2; if (0 == dstWidth || 0 == dstHeight) @@ -540,7 +576,7 @@ namespace bimg const uint8_t* src = (const uint8_t*)_src; uint8_t* dst = (uint8_t*)_dst; - for (uint32_t zz = 0; zz < _depth; ++zz) + if (0 == dstDepth) { for (uint32_t yy = 0, ystep = _srcPitch*2; yy < dstHeight; ++yy, src += ystep) { @@ -570,15 +606,84 @@ namespace bimg xyz[2] += rgba1[6]; xyz[3] += rgba1[7]; - xyz[0] *= 0.25f; - xyz[1] *= 0.25f; - xyz[2] *= 0.25f; - xyz[3] *= 0.25f; + xyz[0] *= 1.0f/4.0f; + xyz[1] *= 1.0f/4.0f; + xyz[2] *= 1.0f/4.0f; + xyz[3] *= 1.0f/4.0f; bx::packRgba32F(dst, xyz); } } } + else + { + const uint32_t slicePitch = _srcPitch*_height; + + for (uint32_t zz = 0; zz < dstDepth; ++zz, src += slicePitch) + { + for (uint32_t yy = 0, ystep = _srcPitch*2; yy < dstHeight; ++yy, src += ystep) + { + const float* rgba0 = (const float*)&src[0]; + const float* rgba1 = (const float*)&src[_srcPitch]; + const float* rgba2 = (const float*)&src[slicePitch]; + const float* rgba3 = (const float*)&src[slicePitch+_srcPitch]; + for (uint32_t xx = 0 + ; xx < dstWidth + ; ++xx, rgba0 += 8, rgba1 += 8, rgba2 += 8, rgba3 += 8, dst += 16 + ) + { + float xyz[4]; + + xyz[0] = rgba0[0]; + xyz[1] = rgba0[1]; + xyz[2] = rgba0[2]; + xyz[3] = rgba0[3]; + + xyz[0] += rgba0[4]; + xyz[1] += rgba0[5]; + xyz[2] += rgba0[6]; + xyz[3] += rgba0[7]; + + xyz[0] += rgba1[0]; + xyz[1] += rgba1[1]; + xyz[2] += rgba1[2]; + xyz[3] += rgba1[3]; + + xyz[0] += rgba1[4]; + xyz[1] += rgba1[5]; + xyz[2] += rgba1[6]; + xyz[3] += rgba1[7]; + + xyz[0] += rgba2[0]; + xyz[1] += rgba2[1]; + xyz[2] += rgba2[2]; + xyz[3] += rgba2[3]; + + xyz[0] += rgba2[4]; + xyz[1] += rgba2[5]; + xyz[2] += rgba2[6]; + xyz[3] += rgba2[7]; + + xyz[0] += rgba3[0]; + xyz[1] += rgba3[1]; + xyz[2] += rgba3[2]; + xyz[3] += rgba3[3]; + + xyz[0] += rgba3[4]; + xyz[1] += rgba3[5]; + xyz[2] += rgba3[6]; + xyz[3] += rgba3[7]; + + xyz[0] *= 1.0f/8.0f; + xyz[1] *= 1.0f/8.0f; + xyz[2] *= 1.0f/8.0f; + xyz[3] *= 1.0f/8.0f; + + bx::packRgba32F(dst, xyz); + } + } + } + } } void imageRgba32fLinearDownsample2x2(void* _dst, uint32_t _width, uint32_t _height, uint32_t _depth, uint32_t _srcPitch, const void* _src) @@ -3086,10 +3191,10 @@ namespace bimg const uint8_t* rgba = src; for (uint32_t xx = 0; xx < dstWidth; ++xx, rgba += 4, dst += 4) { - dst[0] = bx::pow(rgba[0], 2.2f); - dst[1] = bx::pow(rgba[1], 2.2f); - dst[2] = bx::pow(rgba[2], 2.2f); - dst[3] = rgba[3]; + dst[0] = bx::toLinear(rgba[0]); + dst[1] = bx::toLinear(rgba[1]); + dst[2] = bx::toLinear(rgba[2]); + dst[3] = rgba[3]; } } } diff --git a/tools/texturec/texturec.cpp b/tools/texturec/texturec.cpp index 2f3a2e8..357cb13 100644 --- a/tools/texturec/texturec.cpp +++ b/tools/texturec/texturec.cpp @@ -26,7 +26,7 @@ #include #define BIMG_TEXTUREC_VERSION_MAJOR 1 -#define BIMG_TEXTUREC_VERSION_MINOR 13 +#define BIMG_TEXTUREC_VERSION_MINOR 14 struct Options {