diff --git a/3rdparty/tinyexr/tinyexr.h b/3rdparty/tinyexr/tinyexr.h index 75f8839..850cc4c 100644 --- a/3rdparty/tinyexr/tinyexr.h +++ b/3rdparty/tinyexr/tinyexr.h @@ -124,7 +124,8 @@ extern "C" { #define TINYEXR_PIXELTYPE_HALF (1) #define TINYEXR_PIXELTYPE_FLOAT (2) -#define TINYEXR_MAX_ATTRIBUTES (128) +#define TINYEXR_MAX_HEADER_ATTRIBUTES (1024) +#define TINYEXR_MAX_CUSTOM_ATTRIBUTES (128) #define TINYEXR_COMPRESSIONTYPE_NONE (0) #define TINYEXR_COMPRESSIONTYPE_RLE (1) @@ -206,7 +207,8 @@ typedef struct _EXRHeader { // Custom attributes(exludes required attributes(e.g. `channels`, // `compression`, etc) int num_custom_attributes; - EXRAttribute custom_attributes[TINYEXR_MAX_ATTRIBUTES]; + EXRAttribute *custom_attributes; // array of EXRAttribute. size = + // `num_custom_attributes`. EXRChannelInfo *channels; // [num_channels] @@ -6939,6 +6941,14 @@ void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, static const int kEXRVersionSize = 8; +static void cpy2(unsigned short *dst_val, const unsigned short *src_val) { + unsigned char *dst = reinterpret_cast(dst_val); + const unsigned char *src = reinterpret_cast(src_val); + + dst[0] = src[0]; + dst[1] = src[1]; +} + static void swap2(unsigned short *val) { #ifdef MINIZ_LITTLE_ENDIAN (void)val; @@ -6952,6 +6962,36 @@ static void swap2(unsigned short *val) { #endif } +static void cpy4(int *dst_val, const int *src_val) { + unsigned char *dst = reinterpret_cast(dst_val); + const unsigned char *src = reinterpret_cast(src_val); + + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; +} + +static void cpy4(unsigned int *dst_val, const unsigned int *src_val) { + unsigned char *dst = reinterpret_cast(dst_val); + const unsigned char *src = reinterpret_cast(src_val); + + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; +} + +static void cpy4(float *dst_val, const float *src_val) { + unsigned char *dst = reinterpret_cast(dst_val); + const unsigned char *src = reinterpret_cast(src_val); + + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; +} + static void swap4(unsigned int *val) { #ifdef MINIZ_LITTLE_ENDIAN (void)val; @@ -6967,6 +7007,22 @@ static void swap4(unsigned int *val) { #endif } +#if 0 +static void cpy8(tinyexr::tinyexr_uint64 *dst_val, const tinyexr::tinyexr_uint64 *src_val) { + unsigned char *dst = reinterpret_cast(dst_val); + const unsigned char *src = reinterpret_cast(src_val); + + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; + dst[4] = src[4]; + dst[5] = src[5]; + dst[6] = src[6]; + dst[7] = src[7]; +} +#endif + static void swap8(tinyexr::tinyexr_uint64 *val) { #ifdef MINIZ_LITTLE_ENDIAN (void)val; @@ -8215,8 +8271,8 @@ static void hufBuildEncTable( // for all array entries. // - int hlink[HUF_ENCSIZE]; - long long *fHeap[HUF_ENCSIZE]; + std::vector hlink(HUF_ENCSIZE); + std::vector fHeap(HUF_ENCSIZE); *im = 0; @@ -8275,8 +8331,8 @@ static void hufBuildEncTable( std::make_heap(&fHeap[0], &fHeap[nf], FHeapCompare()); - long long scode[HUF_ENCSIZE]; - memset(scode, 0, sizeof(long long) * HUF_ENCSIZE); + std::vector scode(HUF_ENCSIZE); + memset(scode.data(), 0, sizeof(long long) * HUF_ENCSIZE); while (nf > 1) { // @@ -8348,8 +8404,8 @@ static void hufBuildEncTable( // code table from scode into frq. // - hufCanonicalCodeTable(scode); - memcpy(frq, scode, sizeof(long long) * HUF_ENCSIZE); + hufCanonicalCodeTable(scode.data()); + memcpy(frq, scode.data(), sizeof(long long) * HUF_ENCSIZE); } // @@ -8813,7 +8869,7 @@ static bool hufDecode(const long long *hcode, // i : encoding table return true; } -static void countFrequencies(long long freq[HUF_ENCSIZE], +static void countFrequencies(std::vector &freq, const unsigned short data[/*n*/], int n) { for (int i = 0; i < HUF_ENCSIZE; ++i) freq[i] = 0; @@ -8844,21 +8900,21 @@ static int hufCompress(const unsigned short raw[], int nRaw, char compressed[]) { if (nRaw == 0) return 0; - long long freq[HUF_ENCSIZE]; + std::vector freq(HUF_ENCSIZE); countFrequencies(freq, raw, nRaw); int im = 0; int iM = 0; - hufBuildEncTable(freq, &im, &iM); + hufBuildEncTable(freq.data(), &im, &iM); char *tableStart = compressed + 20; char *tableEnd = tableStart; - hufPackEncTable(freq, im, iM, &tableEnd); + hufPackEncTable(freq.data(), im, iM, &tableEnd); int tableLength = tableEnd - tableStart; char *dataStart = tableEnd; - int nBits = hufEncode(freq, raw, nRaw, iM, dataStart); + int nBits = hufEncode(freq.data(), raw, nRaw, iM, dataStart); int data_length = (nBits + 7) / 8; writeUInt(compressed, im); @@ -9003,7 +9059,7 @@ static bool CompressPiz(unsigned char *outPtr, unsigned int *outSize, const unsigned char *inPtr, size_t inSize, const std::vector &channelInfo, int data_width, int num_lines) { - unsigned char bitmap[BITMAP_SIZE]; + std::vector bitmap(BITMAP_SIZE); unsigned short minNonZero; unsigned short maxNonZero; @@ -9054,12 +9110,12 @@ static bool CompressPiz(unsigned char *outPtr, unsigned int *outSize, } } - bitmapFromData(&tmpBuffer.at(0), static_cast(tmpBuffer.size()), bitmap, - minNonZero, maxNonZero); + bitmapFromData(&tmpBuffer.at(0), static_cast(tmpBuffer.size()), + bitmap.data(), minNonZero, maxNonZero); - unsigned short lut[USHORT_RANGE]; - unsigned short maxValue = forwardLutFromBitmap(bitmap, lut); - applyLut(lut, &tmpBuffer.at(0), static_cast(tmpBuffer.size())); + std::vector lut(USHORT_RANGE); + unsigned short maxValue = forwardLutFromBitmap(bitmap.data(), lut.data()); + applyLut(lut.data(), &tmpBuffer.at(0), static_cast(tmpBuffer.size())); // // Store range compression info in _outBuffer @@ -9129,7 +9185,7 @@ static bool DecompressPiz(unsigned char *outPtr, const unsigned char *inPtr, return true; } - unsigned char bitmap[BITMAP_SIZE]; + std::vector bitmap(BITMAP_SIZE); unsigned short minNonZero; unsigned short maxNonZero; @@ -9139,11 +9195,13 @@ static bool DecompressPiz(unsigned char *outPtr, const unsigned char *inPtr, return false; #endif - memset(bitmap, 0, BITMAP_SIZE); + memset(bitmap.data(), 0, BITMAP_SIZE); const unsigned char *ptr = inPtr; - minNonZero = *(reinterpret_cast(ptr)); - maxNonZero = *(reinterpret_cast(ptr + 2)); + //minNonZero = *(reinterpret_cast(ptr)); + tinyexr::cpy2(&minNonZero, reinterpret_cast(ptr)); + //maxNonZero = *(reinterpret_cast(ptr + 2)); + tinyexr::cpy2(&maxNonZero, reinterpret_cast(ptr + 2)); ptr += 4; if (maxNonZero >= BITMAP_SIZE) { @@ -9156,9 +9214,9 @@ static bool DecompressPiz(unsigned char *outPtr, const unsigned char *inPtr, ptr += maxNonZero - minNonZero + 1; } - unsigned short lut[USHORT_RANGE]; - memset(lut, 0, sizeof(unsigned short) * USHORT_RANGE); - unsigned short maxValue = reverseLutFromBitmap(bitmap, lut); + std::vector lut(USHORT_RANGE); + memset(lut.data(), 0, sizeof(unsigned short) * USHORT_RANGE); + unsigned short maxValue = reverseLutFromBitmap(bitmap.data(), lut.data()); // // Huffman decoding @@ -9166,7 +9224,8 @@ static bool DecompressPiz(unsigned char *outPtr, const unsigned char *inPtr, int length; - length = *(reinterpret_cast(ptr)); + //length = *(reinterpret_cast(ptr)); + tinyexr::cpy4(&length, reinterpret_cast(ptr)); ptr += sizeof(int); std::vector tmpBuffer(tmpBufSize); @@ -9212,7 +9271,7 @@ static bool DecompressPiz(unsigned char *outPtr, const unsigned char *inPtr, // Expand the pixel data to their original range // - applyLut(lut, &tmpBuffer.at(0), static_cast(tmpBufSize)); + applyLut(lut.data(), &tmpBuffer.at(0), static_cast(tmpBufSize)); for (int y = 0; y < num_lines; y++) { for (size_t i = 0; i < channelData.size(); ++i) { @@ -9480,7 +9539,8 @@ static bool DecodePixelData(/* out */ unsigned char **out_images, for (size_t u = 0; u < static_cast(width); u++) { FP16 hf; - hf.u = line_ptr[u]; + // hf.u = line_ptr[u]; + tinyexr::cpy2(&(hf.u), line_ptr + u); tinyexr::swap2(reinterpret_cast(&hf.u)); @@ -9523,7 +9583,9 @@ static bool DecodePixelData(/* out */ unsigned char **out_images, &outBuf.at(v * pixel_data_size * static_cast(width) + channel_offset_list[c] * static_cast(width))); for (size_t u = 0; u < static_cast(width); u++) { - unsigned int val = line_ptr[u]; + unsigned int val; + // val = line_ptr[u]; + tinyexr::cpy4(&val, line_ptr + u); tinyexr::swap4(&val); @@ -9549,7 +9611,9 @@ static bool DecodePixelData(/* out */ unsigned char **out_images, v * pixel_data_size * static_cast(x_stride) + channel_offset_list[c] * static_cast(x_stride))); for (size_t u = 0; u < static_cast(width); u++) { - float val = line_ptr[u]; + float val; + // val = line_ptr[u]; + tinyexr::cpy4(&val, line_ptr + u); tinyexr::swap4(reinterpret_cast(&val)); @@ -9611,7 +9675,8 @@ static bool DecodePixelData(/* out */ unsigned char **out_images, for (size_t u = 0; u < static_cast(width); u++) { tinyexr::FP16 hf; - hf.u = line_ptr[u]; + // hf.u = line_ptr[u]; + tinyexr::cpy2(&(hf.u), line_ptr + u); tinyexr::swap2(reinterpret_cast(&hf.u)); @@ -9654,7 +9719,9 @@ static bool DecodePixelData(/* out */ unsigned char **out_images, &outBuf.at(v * pixel_data_size * static_cast(width) + channel_offset_list[c] * static_cast(width))); for (size_t u = 0; u < static_cast(width); u++) { - unsigned int val = line_ptr[u]; + unsigned int val; + // val = line_ptr[u]; + tinyexr::cpy4(&val, line_ptr + u); tinyexr::swap4(&val); @@ -9680,7 +9747,9 @@ static bool DecodePixelData(/* out */ unsigned char **out_images, &outBuf.at(v * pixel_data_size * static_cast(width) + channel_offset_list[c] * static_cast(width))); for (size_t u = 0; u < static_cast(width); u++) { - float val = line_ptr[u]; + float val; + // val = line_ptr[u]; + tinyexr::cpy4(&val, line_ptr + u); tinyexr::swap4(reinterpret_cast(&val)); @@ -9735,7 +9804,8 @@ static bool DecodePixelData(/* out */ unsigned char **out_images, for (size_t u = 0; u < static_cast(width); u++) { tinyexr::FP16 hf; - hf.u = line_ptr[u]; + // hf.u = line_ptr[u]; + tinyexr::cpy2(&(hf.u), line_ptr + u); tinyexr::swap2(reinterpret_cast(&hf.u)); @@ -9778,7 +9848,9 @@ static bool DecodePixelData(/* out */ unsigned char **out_images, &outBuf.at(v * pixel_data_size * static_cast(width) + channel_offset_list[c] * static_cast(width))); for (size_t u = 0; u < static_cast(width); u++) { - unsigned int val = line_ptr[u]; + unsigned int val; + // val = line_ptr[u]; + tinyexr::cpy4(&val, line_ptr + u); tinyexr::swap4(&val); @@ -9804,7 +9876,9 @@ static bool DecodePixelData(/* out */ unsigned char **out_images, &outBuf.at(v * pixel_data_size * static_cast(width) + channel_offset_list[c] * static_cast(width))); for (size_t u = 0; u < static_cast(width); u++) { - float val = line_ptr[u]; + float val; + // val = line_ptr[u]; + tinyexr::cpy4(&val, line_ptr + u); tinyexr::swap4(reinterpret_cast(&val)); @@ -9867,7 +9941,8 @@ static bool DecodePixelData(/* out */ unsigned char **out_images, &outBuf.at(v * pixel_data_size * static_cast(width) + channel_offset_list[c] * static_cast(width))); for (size_t u = 0; u < static_cast(width); u++) { - float val = line_ptr[u]; + float val; + tinyexr::cpy4(&val, line_ptr + u); tinyexr::swap4(reinterpret_cast(&val)); @@ -9917,7 +9992,8 @@ static bool DecodePixelData(/* out */ unsigned char **out_images, for (int u = 0; u < width; u++) { tinyexr::FP16 hf; - hf.u = line_ptr[u]; + // hf.u = line_ptr[u]; + tinyexr::cpy2(&(hf.u), line_ptr + u); tinyexr::swap2(reinterpret_cast(&hf.u)); @@ -9934,7 +10010,9 @@ static bool DecodePixelData(/* out */ unsigned char **out_images, for (int u = 0; u < width; u++) { tinyexr::FP16 hf; - hf.u = line_ptr[u]; + // address may not be aliged. use byte-wise copy for safety.#76 + // hf.u = line_ptr[u]; + tinyexr::cpy2(&(hf.u), line_ptr + u); tinyexr::swap2(reinterpret_cast(&hf.u)); @@ -9958,7 +10036,8 @@ static bool DecodePixelData(/* out */ unsigned char **out_images, } for (int u = 0; u < width; u++) { - float val = line_ptr[u]; + float val; + tinyexr::cpy4(&val, line_ptr + u); tinyexr::swap4(reinterpret_cast(&val)); @@ -9976,7 +10055,8 @@ static bool DecodePixelData(/* out */ unsigned char **out_images, } for (int u = 0; u < width; u++) { - unsigned int val = line_ptr[u]; + unsigned int val; + tinyexr::cpy4(&val, line_ptr + u); tinyexr::swap4(reinterpret_cast(&val)); @@ -10153,7 +10233,7 @@ static int ParseEXRHeader(HeaderInfo *info, bool *empty_header, // Read attributes size_t orig_size = size; - for (;;) { + for (size_t nattr = 0; nattr < TINYEXR_MAX_HEADER_ATTRIBUTES; nattr++) { if (0 == size) { return TINYEXR_ERROR_INVALID_DATA; } else if (marker[0] == '\0') { @@ -10316,8 +10396,8 @@ static int ParseEXRHeader(HeaderInfo *info, bool *empty_header, tinyexr::swap4(reinterpret_cast(&info->chunk_count)); } } else { - // Custom attribute(up to TINYEXR_MAX_ATTRIBUTES) - if (info->attributes.size() < TINYEXR_MAX_ATTRIBUTES) { + // Custom attribute(up to TINYEXR_MAX_CUSTOM_ATTRIBUTES) + if (info->attributes.size() < TINYEXR_MAX_CUSTOM_ATTRIBUTES) { EXRAttribute attrib; #ifdef _MSC_VER strncpy_s(attrib.name, attr_name.c_str(), 255); @@ -10447,15 +10527,30 @@ static void ConvertHeader(EXRHeader *exr_header, const HeaderInfo &info) { exr_header->requested_pixel_types[c] = info.channels[c].pixel_type; } - assert(info.attributes.size() < TINYEXR_MAX_ATTRIBUTES); exr_header->num_custom_attributes = static_cast(info.attributes.size()); - for (size_t i = 0; i < info.attributes.size(); i++) { - memcpy(exr_header->custom_attributes[i].name, info.attributes[i].name, 256); - memcpy(exr_header->custom_attributes[i].type, info.attributes[i].type, 256); - exr_header->custom_attributes[i].size = info.attributes[i].size; - // Just copy poiner - exr_header->custom_attributes[i].value = info.attributes[i].value; + if (exr_header->num_custom_attributes > 0) { + // TODO(syoyo): Report warning when # of attributes exceeds + // `TINYEXR_MAX_CUSTOM_ATTRIBUTES` + if (exr_header->num_custom_attributes > TINYEXR_MAX_CUSTOM_ATTRIBUTES) { + exr_header->num_custom_attributes = TINYEXR_MAX_CUSTOM_ATTRIBUTES; + } + + exr_header->custom_attributes = static_cast(malloc( + sizeof(EXRAttribute) * size_t(exr_header->num_custom_attributes))); + + for (size_t i = 0; i < info.attributes.size(); i++) { + memcpy(exr_header->custom_attributes[i].name, info.attributes[i].name, + 256); + memcpy(exr_header->custom_attributes[i].type, info.attributes[i].type, + 256); + exr_header->custom_attributes[i].size = info.attributes[i].size; + // Just copy poiner + exr_header->custom_attributes[i].value = info.attributes[i].value; + } + + } else { + exr_header->custom_attributes = NULL; } exr_header->header_len = info.header_len; @@ -11458,7 +11553,8 @@ size_t SaveEXRImageToMemory(const EXRImage *exr_image, static_cast(pixel_data_size * y * exr_image->width) + channel_offset_list[c] * static_cast(exr_image->width))); - line_ptr[x] = f32.f; + // line_ptr[x] = f32.f; + tinyexr::cpy4(line_ptr + x, &(f32.f)); } } } else if (exr_header->requested_pixel_types[c] == @@ -11476,7 +11572,8 @@ size_t SaveEXRImageToMemory(const EXRImage *exr_image, exr_image->width) + channel_offset_list[c] * static_cast(exr_image->width))); - line_ptr[x] = val; + // line_ptr[x] = val; + tinyexr::cpy2(line_ptr + x, &val); } } } else { @@ -11502,7 +11599,8 @@ size_t SaveEXRImageToMemory(const EXRImage *exr_image, exr_image->width) + channel_offset_list[c] * static_cast(exr_image->width))); - line_ptr[x] = h16.u; + // line_ptr[x] = h16.u; + tinyexr::cpy2(line_ptr + x, &(h16.u)); } } } else if (exr_header->requested_pixel_types[c] == @@ -11519,7 +11617,8 @@ size_t SaveEXRImageToMemory(const EXRImage *exr_image, static_cast(pixel_data_size * y * exr_image->width) + channel_offset_list[c] * static_cast(exr_image->width))); - line_ptr[x] = val; + // line_ptr[x] = val; + tinyexr::cpy4(line_ptr + x, &val); } } } else { @@ -11538,7 +11637,8 @@ size_t SaveEXRImageToMemory(const EXRImage *exr_image, static_cast(pixel_data_size * y * exr_image->width) + channel_offset_list[c] * static_cast(exr_image->width))); - line_ptr[x] = val; + // line_ptr[x] = val; + tinyexr::cpy4(line_ptr + x, &val); } } } @@ -11768,7 +11868,7 @@ int LoadDeepEXR(DeepImage *deep_image, const char *filename, const char **err) { #ifdef _MSC_VER FILE *fp = NULL; errno_t errcode = fopen_s(&fp, filename, "rb"); - if ((!errcode) || (!fp)) { + if ((0 != errcode) || (!fp)) { if (err) { (*err) = "Cannot read file."; } @@ -12103,8 +12203,10 @@ int LoadDeepEXR(DeepImage *deep_image, const char *filename, const char **err) { if (channels[c].pixel_type == 0) { // UINT for (size_t x = 0; x < static_cast(samples_per_line); x++) { - unsigned int ui = *reinterpret_cast( + unsigned int ui; + unsigned int *src_ptr = reinterpret_cast( &sample_data.at(size_t(data_offset) + x * sizeof(int))); + tinyexr::cpy4(&ui, src_ptr); deep_image->image[c][y][x] = static_cast(ui); // @fixme } data_offset += @@ -12112,16 +12214,19 @@ int LoadDeepEXR(DeepImage *deep_image, const char *filename, const char **err) { } else if (channels[c].pixel_type == 1) { // half for (size_t x = 0; x < static_cast(samples_per_line); x++) { tinyexr::FP16 f16; - f16.u = *reinterpret_cast( + const unsigned short *src_ptr = reinterpret_cast( &sample_data.at(size_t(data_offset) + x * sizeof(short))); + tinyexr::cpy2(&(f16.u), src_ptr); tinyexr::FP32 f32 = half_to_float(f16); deep_image->image[c][y][x] = f32.f; } data_offset += sizeof(short) * static_cast(samples_per_line); } else { // float for (size_t x = 0; x < static_cast(samples_per_line); x++) { - float f = *reinterpret_cast( + float f; + const float *src_ptr = reinterpret_cast( &sample_data.at(size_t(data_offset) + x * sizeof(float))); + tinyexr::cpy4(&f, src_ptr); deep_image->image[c][y][x] = f; } data_offset += sizeof(float) * static_cast(samples_per_line); @@ -12193,6 +12298,10 @@ int FreeEXRHeader(EXRHeader *exr_header) { } } + if (exr_header->custom_attributes) { + free(exr_header->custom_attributes); + } + return TINYEXR_SUCCESS; } @@ -12222,6 +12331,7 @@ int FreeEXRImage(EXRImage *exr_image) { free(exr_image->tiles[tid].images); } } + free(exr_image->tiles); } return TINYEXR_SUCCESS; diff --git a/include/bimg/bimg.h b/include/bimg/bimg.h index 8122b9e..8f62b74 100644 --- a/include/bimg/bimg.h +++ b/include/bimg/bimg.h @@ -9,7 +9,7 @@ #include // uint32_t #include // NULL -#define BIMG_API_VERSION UINT32_C(5) +#define BIMG_API_VERSION UINT32_C(6) namespace bx { @@ -404,7 +404,8 @@ namespace bimg /// bool imageConvert( - void* _dst + bx::AllocatorI* _allocator + , void* _dst , TextureFormat::Enum _dstFormat , const void* _src , TextureFormat::Enum _srcFormat @@ -426,6 +427,7 @@ namespace bimg bx::AllocatorI* _allocator , TextureFormat::Enum _dstFormat , const ImageContainer& _input + , bool _convertMips = true ); /// @@ -575,7 +577,8 @@ namespace bimg /// void imageDecodeToBgra8( - void* _dst + bx::AllocatorI* _allocator + , void* _dst , const void* _src , uint32_t _width , uint32_t _height @@ -585,7 +588,8 @@ namespace bimg /// void imageDecodeToRgba8( - void* _dst + bx::AllocatorI* _allocator + , void* _dst , const void* _src , uint32_t _width , uint32_t _height diff --git a/include/bimg/encode.h b/include/bimg/encode.h index bab8de7..b508745 100644 --- a/include/bimg/encode.h +++ b/include/bimg/encode.h @@ -24,7 +24,8 @@ namespace bimg /// void imageEncodeFromRgba8( - void* _dst + bx::AllocatorI* _allocator + , void* _dst , const void* _src , uint32_t _width , uint32_t _height diff --git a/src/image.cpp b/src/image.cpp index 29a7236..3afcba4 100644 --- a/src/image.cpp +++ b/src/image.cpp @@ -969,13 +969,31 @@ namespace bimg } } - bool imageConvert(void* _dst, TextureFormat::Enum _dstFormat, const void* _src, TextureFormat::Enum _srcFormat, uint32_t _width, uint32_t _height, uint32_t _depth, uint32_t _srcPitch) + bool imageConvert(bx::AllocatorI* _allocator, void* _dst, TextureFormat::Enum _dstFormat, const void* _src, TextureFormat::Enum _srcFormat, uint32_t _width, uint32_t _height, uint32_t _depth, uint32_t _srcPitch) { UnpackFn unpack = s_packUnpack[_srcFormat].unpack; PackFn pack = s_packUnpack[_dstFormat].pack; if (NULL == pack || NULL == unpack) { + switch (_dstFormat) + { + case TextureFormat::RGBA8: + imageDecodeToRgba8(_allocator, _dst, _src, _width, _height, _width*4, _srcFormat); + return true; + + case TextureFormat::BGRA8: + imageDecodeToBgra8(_allocator, _dst, _src, _width, _height, _width*4, _srcFormat); + return true; + + case TextureFormat::RGBA32F: + imageDecodeToRgba32f(_allocator, _dst, _src, _width, _height, 1, _width*16, _srcFormat); + return true; + + default: + break; + } + return false; } @@ -986,7 +1004,7 @@ namespace bimg return true; } - bool imageConvert(void* _dst, TextureFormat::Enum _dstFormat, const void* _src, TextureFormat::Enum _srcFormat, uint32_t _width, uint32_t _height, uint32_t _depth) + bool imageConvert(bx::AllocatorI* _allocator, void* _dst, TextureFormat::Enum _dstFormat, const void* _src, TextureFormat::Enum _srcFormat, uint32_t _width, uint32_t _height, uint32_t _depth) { const uint32_t srcBpp = s_imageBlockInfo[_srcFormat].bitsPerPixel; @@ -996,10 +1014,10 @@ namespace bimg return true; } - return imageConvert(_dst, _dstFormat, _src, _srcFormat, _width, _height, _depth, _width*srcBpp/8); + return imageConvert(_allocator, _dst, _dstFormat, _src, _srcFormat, _width, _height, _depth, _width*srcBpp/8); } - ImageContainer* imageConvert(bx::AllocatorI* _allocator, TextureFormat::Enum _dstFormat, const ImageContainer& _input) + ImageContainer* imageConvert(bx::AllocatorI* _allocator, TextureFormat::Enum _dstFormat, const ImageContainer& _input, bool _convertMips) { ImageContainer* output = imageAlloc(_allocator , _dstFormat @@ -1008,14 +1026,14 @@ namespace bimg , uint16_t(_input.m_depth) , _input.m_numLayers , _input.m_cubeMap - , 1 < _input.m_numMips + , _convertMips && 1 < _input.m_numMips ); const uint16_t numSides = _input.m_numLayers * (_input.m_cubeMap ? 6 : 1); for (uint16_t side = 0; side < numSides; ++side) { - for (uint8_t lod = 0, num = _input.m_numMips; lod < num; ++lod) + for (uint8_t lod = 0, num = _convertMips ? _input.m_numMips : 1; lod < num; ++lod) { ImageMip mip; if (imageGetRawData(_input, side, lod, _input.m_data, _input.m_size, mip) ) @@ -1024,14 +1042,16 @@ namespace bimg imageGetRawData(*output, side, lod, output->m_data, output->m_size, dstMip); uint8_t* dstData = const_cast(dstMip.m_data); - bool ok = imageConvert(dstData - , _dstFormat - , mip.m_data - , mip.m_format - , mip.m_width - , mip.m_height - , mip.m_depth - ); + bool ok = imageConvert( + _allocator + , dstData + , _dstFormat + , mip.m_data + , mip.m_format + , mip.m_width + , mip.m_height + , mip.m_depth + ); BX_CHECK(ok, "Conversion from %s to %s failed!" , getName(_input.m_format) , getName(output->m_format) @@ -1240,19 +1260,997 @@ namespace bimg } } - static const int32_t s_etc1Mod[8][4] = - { - { 2, 8, -2, -8}, - { 5, 17, -5, -17}, - { 9, 29, -9, -29}, - { 13, 42, -13, -42}, - { 18, 60, -18, -60}, - { 24, 80, -24, -80}, - { 33, 106, -33, -106}, - { 47, 183, -47, -183}, + // BC6H, BC7 + // + // Reference: + // + // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_bptc.txt + // https://msdn.microsoft.com/en-us/library/windows/desktop/hh308952(v=vs.85).aspx + + static const uint16_t s_bptcP2[] = + { // 3210 0000000000 1111111111 2222222222 3333333333 + 0xcccc, // 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 + 0x8888, // 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1 + 0xeeee, // 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1 + 0xecc8, // 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1 + 0xc880, // 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1 + 0xfeec, // 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 + 0xfec8, // 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 + 0xec80, // 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1 + 0xc800, // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1 + 0xffec, // 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + 0xfe80, // 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1 + 0xe800, // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1 + 0xffe8, // 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + 0xff00, // 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 + 0xfff0, // 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + 0xf000, // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1 + 0xf710, // 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1 + 0x008e, // 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 + 0x7100, // 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0 + 0x08ce, // 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0 + 0x008c, // 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 + 0x7310, // 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0 + 0x3100, // 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0 + 0x8cce, // 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1 + 0x088c, // 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0 + 0x3110, // 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0 + 0x6666, // 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0 + 0x366c, // 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0 + 0x17e8, // 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0 + 0x0ff0, // 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 + 0x718e, // 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0 + 0x399c, // 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0 + 0xaaaa, // 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 + 0xf0f0, // 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1 + 0x5a5a, // 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0 + 0x33cc, // 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0 + 0x3c3c, // 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0 + 0x55aa, // 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0 + 0x9696, // 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1 + 0xa55a, // 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1 + 0x73ce, // 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0 + 0x13c8, // 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0 + 0x324c, // 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0 + 0x3bdc, // 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0 + 0x6996, // 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0 + 0xc33c, // 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1 + 0x9966, // 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1 + 0x0660, // 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0 + 0x0272, // 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0 + 0x04e4, // 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0 + 0x4e40, // 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0 + 0x2720, // 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0 + 0xc936, // 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1 + 0x936c, // 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1 + 0x39c6, // 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0 + 0x639c, // 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0 + 0x9336, // 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1 + 0x9cc6, // 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1 + 0x817e, // 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1 + 0xe718, // 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1 + 0xccf0, // 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 + 0x0fcc, // 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 + 0x7744, // 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0 + 0xee22, // 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1 }; - static const uint8_t s_etc2Mod[8] = { 3, 6, 11, 16, 23, 32, 41, 64 }; + static const uint32_t s_bptcP3[] = + { // 76543210 0000 1111 2222 3333 4444 5555 6666 7777 + 0xaa685050, // 0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 1, 2, 2, 2, 2 + 0x6a5a5040, // 0, 0, 0, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1 + 0x5a5a4200, // 0, 0, 0, 0, 2, 0, 0, 1, 2, 2, 1, 1, 2, 2, 1, 1 + 0x5450a0a8, // 0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 1, 0, 1, 1, 1 + 0xa5a50000, // 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2 + 0xa0a05050, // 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 2, 2 + 0x5555a0a0, // 0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1 + 0x5a5a5050, // 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1 + 0xaa550000, // 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2 + 0xaa555500, // 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2 + 0xaaaa5500, // 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2 + 0x90909090, // 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2 + 0x94949494, // 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2 + 0xa4a4a4a4, // 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2 + 0xa9a59450, // 0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2, 1, 2, 2, 2 + 0x2a0a4250, // 0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0, 2, 2, 2, 0 + 0xa5945040, // 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2 + 0x0a425054, // 0, 1, 1, 1, 0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0 + 0xa5a5a500, // 0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2 + 0x55a0a0a0, // 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1 + 0xa8a85454, // 0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2 + 0x6a6a4040, // 0, 0, 0, 1, 0, 0, 0, 1, 2, 2, 2, 1, 2, 2, 2, 1 + 0xa4a45000, // 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2 + 0x1a1a0500, // 0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 1, 0, 2, 2, 1, 0 + 0x0050a4a4, // 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1, 0, 0, 0, 0 + 0xaaa59090, // 0, 0, 1, 2, 0, 0, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2 + 0x14696914, // 0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1, 0, 1, 1, 0 + 0x69691400, // 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1 + 0xa08585a0, // 0, 0, 2, 2, 1, 1, 0, 2, 1, 1, 0, 2, 0, 0, 2, 2 + 0xaa821414, // 0, 1, 1, 0, 0, 1, 1, 0, 2, 0, 0, 2, 2, 2, 2, 2 + 0x50a4a450, // 0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1 + 0x6a5a0200, // 0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 1, 1, 2, 2, 2, 1 + 0xa9a58000, // 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 2, 2, 2 + 0x5090a0a8, // 0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 2, 0, 0, 1, 1 + 0xa8a09050, // 0, 0, 1, 1, 0, 0, 1, 2, 0, 0, 2, 2, 0, 2, 2, 2 + 0x24242424, // 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0 + 0x00aa5500, // 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0 + 0x24924924, // 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0 + 0x24499224, // 0, 1, 2, 0, 2, 0, 1, 2, 1, 2, 0, 1, 0, 1, 2, 0 + 0x50a50a50, // 0, 0, 1, 1, 2, 2, 0, 0, 1, 1, 2, 2, 0, 0, 1, 1 + 0x500aa550, // 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0, 1, 1 + 0xaaaa4444, // 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2 + 0x66660000, // 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1 + 0xa5a0a5a0, // 0, 0, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2, 1, 1, 2, 2 + 0x50a050a0, // 0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 1, 1 + 0x69286928, // 0, 2, 2, 0, 1, 2, 2, 1, 0, 2, 2, 0, 1, 2, 2, 1 + 0x44aaaa44, // 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 0, 1 + 0x66666600, // 0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1 + 0xaa444444, // 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2 + 0x54a854a8, // 0, 2, 2, 2, 0, 1, 1, 1, 0, 2, 2, 2, 0, 1, 1, 1 + 0x95809580, // 0, 0, 0, 2, 1, 1, 1, 2, 0, 0, 0, 2, 1, 1, 1, 2 + 0x96969600, // 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2 + 0xa85454a8, // 0, 2, 2, 2, 0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2 + 0x80959580, // 0, 0, 0, 2, 1, 1, 1, 2, 1, 1, 1, 2, 0, 0, 0, 2 + 0xaa141414, // 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2 + 0x96960000, // 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2 + 0xaaaa1414, // 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2 + 0xa05050a0, // 0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2 + 0xa0a5a5a0, // 0, 0, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2 + 0x96000000, // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2 + 0x40804080, // 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 1 + 0xa9a8a9a8, // 0, 2, 2, 2, 1, 2, 2, 2, 0, 2, 2, 2, 1, 2, 2, 2 + 0xaaaaaa44, // 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 + 0x2a4a5254, // 0, 1, 1, 1, 2, 0, 1, 1, 2, 2, 0, 1, 2, 2, 2, 0 + }; + + static const uint8_t s_bptcA2[] = + { + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 2, 8, 2, 2, 8, 8, 15, + 2, 8, 2, 2, 8, 8, 2, 2, + 15, 15, 6, 8, 2, 8, 15, 15, + 2, 8, 2, 2, 2, 15, 15, 6, + 6, 2, 6, 8, 15, 15, 2, 2, + 15, 15, 15, 15, 15, 2, 2, 15, + }; + + static const uint8_t s_bptcA3[2][64] = + { + { + 3, 3, 15, 15, 8, 3, 15, 15, + 8, 8, 6, 6, 6, 5, 3, 3, + 3, 3, 8, 15, 3, 3, 6, 10, + 5, 8, 8, 6, 8, 5, 15, 15, + 8, 15, 3, 5, 6, 10, 8, 15, + 15, 3, 15, 5, 15, 15, 15, 15, + 3, 15, 5, 5, 5, 8, 5, 10, + 5, 10, 8, 13, 15, 12, 3, 3, + }, + { + 15, 8, 8, 3, 15, 15, 3, 8, + 15, 15, 15, 15, 15, 15, 15, 8, + 15, 8, 15, 3, 15, 8, 15, 8, + 3, 15, 6, 10, 15, 15, 10, 8, + 15, 3, 15, 10, 10, 8, 9, 10, + 6, 15, 8, 15, 3, 6, 6, 8, + 15, 3, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 3, 15, 15, 8, + }, + }; + + static const uint8_t s_bptcFactors[3][16] = + { + { 0, 21, 43, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + { 0, 9, 18, 27, 37, 46, 55, 64, 0, 0, 0, 0, 0, 0, 0, 0 }, + { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }, + }; + + struct BitReader + { + BitReader(const uint8_t* _data, uint16_t _bitPos = 0) + : m_data(_data) + , m_bitPos(_bitPos) + { + } + + uint16_t read(uint8_t _numBits) + { + const uint16_t pos = m_bitPos / 8; + const uint16_t shift = m_bitPos & 7; + uint32_t data = 0; + bx::memCopy(&data, &m_data[pos], bx::min(4, 16-pos) ); + m_bitPos += _numBits; + return uint16_t( (data >> shift) & ( (1 << _numBits)-1) ); + } + + uint16_t peek(uint16_t _offset, uint8_t _numBits) + { + const uint16_t bitPos = m_bitPos + _offset; + const uint16_t shift = bitPos & 7; + uint16_t pos = bitPos / 8; + uint32_t data = 0; + bx::memCopy(&data, &m_data[pos], bx::min(4, 16-pos) ); + return uint8_t( (data >> shift) & ( (1 << _numBits)-1) ); + } + + const uint8_t* m_data; + uint16_t m_bitPos; + }; + + uint16_t bc6hUnquantize(uint16_t _value, bool _signed, uint8_t _endpointBits) + { + const uint16_t maxValue = 1<<(_endpointBits-1); + + if (_signed) + { + if (_endpointBits >= 16) + { + return _value; + } + + const bool sign = !!(_value & 0x8000); + _value &= 0x7fff; + + uint16_t unq; + + if (0 == _value) + { + unq = 0; + } + else if (_value >= maxValue-1) + { + unq = 0x7fff; + } + else + { + unq = ( (_value<<15) + 0x4000) >> (_endpointBits-1); + } + + return sign ? -unq : unq; + } + + if (_endpointBits >= 15) + { + return _value; + } + + if (0 == _value) + { + return 0; + } + + if (_value == maxValue) + { + return UINT16_MAX; + } + + return ( (_value<<15) + 0x4000) >> (_endpointBits-1); + } + + uint16_t bc6hUnquantizeFinal(uint16_t _value, bool _signed) + { + if (_signed) + { + const uint16_t sign = _value & 0x8000; + _value &= 0x7fff; + + return ( (_value * 31) >> 5) | sign; + } + + return (_value * 31) >> 6; + } + + uint16_t signExtend(uint16_t _value, uint8_t _numBits) + { + const uint16_t mask = 1 << (_numBits - 1); + const uint16_t result = (_value ^ mask) - mask; + + return result; + } + + struct Bc6hModeInfo + { + uint8_t transformed; + uint8_t partitionBits; + uint8_t endpointBits; + uint8_t deltaBits[3]; + }; + + static const Bc6hModeInfo s_bc6hModeInfo[] = + { // +--------------------------- transformed + // | +------------------------ partition bits + // | | +--------------------- endpoint bits + // | | | +-------------- delta bits + { 1, 5, 10, { 5, 5, 5 } }, // 00 2-bits + { 1, 5, 7, { 6, 6, 6 } }, // 01 + { 1, 5, 11, { 5, 4, 4 } }, // 00010 5-bits + { 0, 0, 10, { 10, 10, 10 } }, // 00011 + { 0, 0, 0, { 0, 0, 0 } }, // - + { 0, 0, 0, { 0, 0, 0 } }, // - + { 1, 5, 11, { 4, 5, 4 } }, // 00110 + { 1, 0, 11, { 9, 9, 9 } }, // 00010 + { 0, 0, 0, { 0, 0, 0 } }, // - + { 0, 0, 0, { 0, 0, 0 } }, // - + { 1, 5, 11, { 4, 4, 5 } }, // 00010 + { 1, 0, 12, { 8, 8, 8 } }, // 00010 + { 0, 0, 0, { 0, 0, 0 } }, // - + { 0, 0, 0, { 0, 0, 0 } }, // - + { 1, 5, 9, { 5, 5, 5 } }, // 00010 + { 1, 0, 16, { 4, 4, 4 } }, // 00010 + { 0, 0, 0, { 0, 0, 0 } }, // - + { 0, 0, 0, { 0, 0, 0 } }, // - + { 1, 5, 8, { 6, 5, 5 } }, // 00010 + { 0, 0, 0, { 0, 0, 0 } }, // - + { 0, 0, 0, { 0, 0, 0 } }, // - + { 0, 0, 0, { 0, 0, 0 } }, // - + { 1, 5, 8, { 5, 6, 5 } }, // 00010 + { 0, 0, 0, { 0, 0, 0 } }, // - + { 0, 0, 0, { 0, 0, 0 } }, // - + { 0, 0, 0, { 0, 0, 0 } }, // - + { 1, 5, 8, { 5, 5, 6 } }, // 00010 + { 0, 0, 0, { 0, 0, 0 } }, // - + { 0, 0, 0, { 0, 0, 0 } }, // - + { 0, 0, 0, { 0, 0, 0 } }, // - + { 0, 5, 6, { 6, 6, 6 } }, // 00010 + { 0, 0, 0, { 0, 0, 0 } }, // - + }; + + void decodeBlockBc6h(uint16_t _dst[16*3], const uint8_t _src[16], bool _signed) + { + BitReader bit(_src); + + uint8_t mode = uint8_t(bit.read(2) ); + if (mode & 2) + { + // 5-bit mode + mode |= bit.read(3) << 2; + } + + const Bc6hModeInfo& mi = s_bc6hModeInfo[mode]; + if (0 == mi.endpointBits) + { + bx::memSet(_dst, 0, 16*3*2); + return; + } + + uint16_t epR[4] = { /* rw, rx, ry, rz */ }; + uint16_t epG[4] = { /* gw, gx, gy, gz */ }; + uint16_t epB[4] = { /* bw, bx, by, bz */ }; + + switch (mode) + { + case 0: + epG[2] |= bit.read( 1) << 4; + epB[2] |= bit.read( 1) << 4; + epB[3] |= bit.read( 1) << 4; + epR[0] |= bit.read(10) << 0; + epG[0] |= bit.read(10) << 0; + epB[0] |= bit.read(10) << 0; + epR[1] |= bit.read( 5) << 0; + epG[3] |= bit.read( 1) << 4; + epG[2] |= bit.read( 4) << 0; + epG[1] |= bit.read( 5) << 0; + epB[3] |= bit.read( 1) << 0; + epG[3] |= bit.read( 4) << 0; + epB[1] |= bit.read( 5) << 0; + epB[3] |= bit.read( 1) << 1; + epB[2] |= bit.read( 4) << 0; + epR[2] |= bit.read( 5) << 0; + epB[3] |= bit.read( 1) << 2; + epR[3] |= bit.read( 5) << 0; + epB[3] |= bit.read( 1) << 3; + break; + + case 1: + epG[2] |= bit.read( 1) << 5; + epG[3] |= bit.read( 1) << 4; + epG[3] |= bit.read( 1) << 5; + epR[0] |= bit.read( 7) << 0; + epB[3] |= bit.read( 1) << 0; + epB[3] |= bit.read( 1) << 1; + epB[2] |= bit.read( 1) << 4; + epG[0] |= bit.read( 7) << 0; + epB[2] |= bit.read( 1) << 5; + epB[3] |= bit.read( 1) << 2; + epG[2] |= bit.read( 1) << 4; + epB[0] |= bit.read( 7) << 0; + epB[3] |= bit.read( 1) << 3; + epB[3] |= bit.read( 1) << 5; + epB[3] |= bit.read( 1) << 4; + epR[1] |= bit.read( 6) << 0; + epG[2] |= bit.read( 4) << 0; + epG[1] |= bit.read( 6) << 0; + epG[3] |= bit.read( 4) << 0; + epB[1] |= bit.read( 6) << 0; + epB[2] |= bit.read( 4) << 0; + epR[2] |= bit.read( 6) << 0; + epR[3] |= bit.read( 6) << 0; + break; + + case 2: + epR[0] |= bit.read(10) << 0; + epG[0] |= bit.read(10) << 0; + epB[0] |= bit.read(10) << 0; + epR[1] |= bit.read( 5) << 0; + epR[0] |= bit.read( 1) << 10; + epG[2] |= bit.read( 4) << 0; + epG[1] |= bit.read( 4) << 0; + epG[0] |= bit.read( 1) << 10; + epB[3] |= bit.read( 1) << 0; + epG[3] |= bit.read( 4) << 0; + epB[1] |= bit.read( 4) << 0; + epB[0] |= bit.read( 1) << 10; + epB[3] |= bit.read( 1) << 1; + epB[2] |= bit.read( 4) << 0; + epR[2] |= bit.read( 5) << 0; + epB[3] |= bit.read( 1) << 2; + epR[3] |= bit.read( 5) << 0; + epB[3] |= bit.read( 1) << 3; + break; + + case 3: + epR[0] |= bit.read(10) << 0; + epG[0] |= bit.read(10) << 0; + epB[0] |= bit.read(10) << 0; + epR[1] |= bit.read(10) << 0; + epG[1] |= bit.read(10) << 0; + epB[1] |= bit.read(10) << 0; + break; + + case 6: + epR[0] |= bit.read(10) << 0; + epG[0] |= bit.read(10) << 0; + epB[0] |= bit.read(10) << 0; + epR[1] |= bit.read( 4) << 0; + epR[0] |= bit.read( 1) << 10; + epG[3] |= bit.read( 1) << 4; + epG[2] |= bit.read( 4) << 0; + epG[1] |= bit.read( 5) << 0; + epG[0] |= bit.read( 1) << 10; + epG[3] |= bit.read( 4) << 0; + epB[1] |= bit.read( 4) << 0; + epB[0] |= bit.read( 1) << 10; + epB[3] |= bit.read( 1) << 1; + epB[2] |= bit.read( 4) << 0; + epR[2] |= bit.read( 4) << 0; + epB[3] |= bit.read( 1) << 0; + epB[3] |= bit.read( 1) << 2; + epR[3] |= bit.read( 4) << 0; + epG[2] |= bit.read( 1) << 4; + epB[3] |= bit.read( 1) << 3; + break; + + case 7: + epR[0] |= bit.read(10) << 0; + epG[0] |= bit.read(10) << 0; + epB[0] |= bit.read(10) << 0; + epR[1] |= bit.read( 9) << 0; + epR[0] |= bit.read( 1) << 10; + epG[1] |= bit.read( 9) << 0; + epG[0] |= bit.read( 1) << 10; + epB[1] |= bit.read( 9) << 0; + epB[0] |= bit.read( 1) << 10; + break; + + case 10: + epR[0] |= bit.read(10) << 0; + epG[0] |= bit.read(10) << 0; + epB[0] |= bit.read(10) << 0; + epR[1] |= bit.read( 4) << 0; + epR[0] |= bit.read( 1) << 10; + epB[2] |= bit.read( 1) << 4; + epG[2] |= bit.read( 4) << 0; + epG[1] |= bit.read( 4) << 0; + epG[0] |= bit.read( 1) << 10; + epB[3] |= bit.read( 1) << 0; + epG[3] |= bit.read( 4) << 0; + epB[1] |= bit.read( 5) << 0; + epB[0] |= bit.read( 1) << 10; + epB[2] |= bit.read( 4) << 0; + epR[2] |= bit.read( 4) << 0; + epB[3] |= bit.read( 1) << 1; + epB[3] |= bit.read( 1) << 2; + epR[3] |= bit.read( 4) << 0; + epB[3] |= bit.read( 1) << 4; + epB[3] |= bit.read( 1) << 3; + break; + + case 11: + epR[0] |= bit.read(10) << 0; + epG[0] |= bit.read(10) << 0; + epB[0] |= bit.read(10) << 0; + epR[1] |= bit.read( 8) << 0; + epR[0] |= bit.read( 1) << 11; + epR[0] |= bit.read( 1) << 10; + epG[1] |= bit.read( 8) << 0; + epG[0] |= bit.read( 1) << 11; + epG[0] |= bit.read( 1) << 10; + epB[1] |= bit.read( 8) << 0; + epB[0] |= bit.read( 1) << 11; + epB[0] |= bit.read( 1) << 10; + break; + + case 14: + epR[0] |= bit.read( 9) << 0; + epB[2] |= bit.read( 1) << 4; + epG[0] |= bit.read( 9) << 0; + epG[2] |= bit.read( 1) << 4; + epB[0] |= bit.read( 9) << 0; + epB[3] |= bit.read( 1) << 4; + epR[1] |= bit.read( 5) << 0; + epG[3] |= bit.read( 1) << 4; + epG[2] |= bit.read( 4) << 0; + epG[1] |= bit.read( 5) << 0; + epB[3] |= bit.read( 1) << 0; + epG[3] |= bit.read( 4) << 0; + epB[1] |= bit.read( 5) << 0; + epB[3] |= bit.read( 1) << 1; + epB[2] |= bit.read( 4) << 0; + epR[2] |= bit.read( 5) << 0; + epB[3] |= bit.read( 1) << 2; + epR[3] |= bit.read( 5) << 0; + epB[3] |= bit.read( 1) << 3; + break; + + case 15: + epR[0] |= bit.read(10) << 0; + epG[0] |= bit.read(10) << 0; + epB[0] |= bit.read(10) << 0; + epR[1] |= bit.read( 4) << 0; + epR[0] |= bit.read( 1) << 15; + epR[0] |= bit.read( 1) << 14; + epR[0] |= bit.read( 1) << 13; + epR[0] |= bit.read( 1) << 12; + epR[0] |= bit.read( 1) << 11; + epR[0] |= bit.read( 1) << 10; + epG[1] |= bit.read( 4) << 0; + epG[0] |= bit.read( 1) << 15; + epG[0] |= bit.read( 1) << 14; + epG[0] |= bit.read( 1) << 13; + epG[0] |= bit.read( 1) << 12; + epG[0] |= bit.read( 1) << 11; + epG[0] |= bit.read( 1) << 10; + epB[1] |= bit.read( 4) << 0; + epB[0] |= bit.read( 1) << 15; + epB[0] |= bit.read( 1) << 14; + epB[0] |= bit.read( 1) << 13; + epB[0] |= bit.read( 1) << 12; + epB[0] |= bit.read( 1) << 11; + epB[0] |= bit.read( 1) << 10; + break; + + case 18: + epR[0] |= bit.read( 8) << 0; + epG[3] |= bit.read( 1) << 4; + epB[2] |= bit.read( 1) << 4; + epG[0] |= bit.read( 8) << 0; + epB[3] |= bit.read( 1) << 2; + epG[2] |= bit.read( 1) << 4; + epB[0] |= bit.read( 8) << 0; + epB[3] |= bit.read( 1) << 3; + epB[3] |= bit.read( 1) << 4; + epR[1] |= bit.read( 6) << 0; + epG[2] |= bit.read( 4) << 0; + epG[1] |= bit.read( 5) << 0; + epB[3] |= bit.read( 1) << 0; + epG[3] |= bit.read( 4) << 0; + epB[1] |= bit.read( 5) << 0; + epB[3] |= bit.read( 1) << 1; + epB[2] |= bit.read( 4) << 0; + epR[2] |= bit.read( 6) << 0; + epR[3] |= bit.read( 6) << 0; + break; + + case 22: + epR[0] |= bit.read( 8) << 0; + epB[3] |= bit.read( 1) << 0; + epB[2] |= bit.read( 1) << 4; + epG[0] |= bit.read( 8) << 0; + epG[2] |= bit.read( 1) << 5; + epG[2] |= bit.read( 1) << 4; + epB[0] |= bit.read( 8) << 0; + epG[3] |= bit.read( 1) << 5; + epB[3] |= bit.read( 1) << 4; + epR[1] |= bit.read( 5) << 0; + epG[3] |= bit.read( 1) << 4; + epG[2] |= bit.read( 4) << 0; + epG[1] |= bit.read( 6) << 0; + epG[3] |= bit.read( 4) << 0; + epB[1] |= bit.read( 5) << 0; + epB[3] |= bit.read( 1) << 1; + epB[2] |= bit.read( 4) << 0; + epR[2] |= bit.read( 5) << 0; + epB[3] |= bit.read( 1) << 2; + epR[3] |= bit.read( 5) << 0; + epB[3] |= bit.read( 1) << 3; + break; + + case 26: + epR[0] |= bit.read( 8) << 0; + epB[3] |= bit.read( 1) << 1; + epB[2] |= bit.read( 1) << 4; + epG[0] |= bit.read( 8) << 0; + epB[2] |= bit.read( 1) << 5; + epG[2] |= bit.read( 1) << 4; + epB[0] |= bit.read( 8) << 0; + epB[3] |= bit.read( 1) << 5; + epB[3] |= bit.read( 1) << 4; + epR[1] |= bit.read( 5) << 0; + epG[3] |= bit.read( 1) << 4; + epG[2] |= bit.read( 4) << 0; + epG[1] |= bit.read( 5) << 0; + epB[3] |= bit.read( 1) << 0; + epG[3] |= bit.read( 4) << 0; + epB[1] |= bit.read( 6) << 0; + epB[2] |= bit.read( 4) << 0; + epR[2] |= bit.read( 5) << 0; + epB[3] |= bit.read( 1) << 2; + epR[3] |= bit.read( 5) << 0; + epB[3] |= bit.read( 1) << 3; + break; + + case 30: + epR[0] |= bit.read( 6) << 0; + epG[3] |= bit.read( 1) << 4; + epB[3] |= bit.read( 1) << 0; + epB[3] |= bit.read( 1) << 1; + epB[2] |= bit.read( 1) << 4; + epG[0] |= bit.read( 6) << 0; + epG[2] |= bit.read( 1) << 5; + epB[2] |= bit.read( 1) << 5; + epB[3] |= bit.read( 1) << 2; + epG[2] |= bit.read( 1) << 4; + epB[0] |= bit.read( 6) << 0; + epG[3] |= bit.read( 1) << 5; + epB[3] |= bit.read( 1) << 3; + epB[3] |= bit.read( 1) << 5; + epB[3] |= bit.read( 1) << 4; + epR[1] |= bit.read( 6) << 0; + epG[2] |= bit.read( 4) << 0; + epG[1] |= bit.read( 6) << 0; + epG[3] |= bit.read( 4) << 0; + epB[1] |= bit.read( 6) << 0; + epB[2] |= bit.read( 4) << 0; + epR[2] |= bit.read( 6) << 0; + epR[3] |= bit.read( 6) << 0; + break; + + default: + break; + } + + if (_signed) + { + epR[0] = signExtend(epR[0], mi.endpointBits); + epG[0] = signExtend(epG[0], mi.endpointBits); + epB[0] = signExtend(epB[0], mi.endpointBits); + } + + const uint8_t numSubsets = !!mi.partitionBits + 1; + + for (uint8_t ii = 1, num = numSubsets*2; ii < num; ++ii) + { + if (_signed + || mi.transformed) + { + epR[ii] = signExtend(epR[ii], mi.deltaBits[0]); + epG[ii] = signExtend(epG[ii], mi.deltaBits[1]); + epB[ii] = signExtend(epB[ii], mi.deltaBits[2]); + } + + if (mi.transformed) + { + const uint16_t mask = (1<> idx) & 1; + indexAnchor = subsetIndex ? s_bptcA2[partitionSetIdx] : 0; + } + + const uint8_t anchor = idx == indexAnchor; + const uint8_t num = indexBits - anchor; + const uint8_t index = (uint8_t)bit.read(num); + + const uint8_t fc = factors[index]; + const uint8_t fca = 64 - fc; + const uint8_t fcb = fc; + + subsetIndex *= 2; + uint16_t rr = bc6hUnquantizeFinal( (epR[subsetIndex]*fca + epR[subsetIndex + 1]*fcb + 32) >> 6, _signed); + uint16_t gg = bc6hUnquantizeFinal( (epG[subsetIndex]*fca + epG[subsetIndex + 1]*fcb + 32) >> 6, _signed); + uint16_t bb = bc6hUnquantizeFinal( (epB[subsetIndex]*fca + epB[subsetIndex + 1]*fcb + 32) >> 6, _signed); + + uint16_t* rgba = &_dst[idx*3]; + rgba[0] = rr; + rgba[1] = gg; + rgba[2] = bb; + } + } + } + + void decodeBlockBc6h(float _dst[16*4], const uint8_t _src[16]) + { + uint16_t tmp[16*3]; + + decodeBlockBc6h(tmp, _src, true); + + for (uint32_t ii = 0; ii < 16; ++ii) + { + _dst[ii*4+0] = bx::halfToFloat(tmp[ii*3+0]); + _dst[ii*4+1] = bx::halfToFloat(tmp[ii*3+1]); + _dst[ii*4+2] = bx::halfToFloat(tmp[ii*3+2]); + _dst[ii*4+3] = 1.0f; + } + } + + struct Bc7ModeInfo + { + uint8_t numSubsets; + uint8_t partitionBits; + uint8_t rotationBits; + uint8_t indexSelectionBits; + uint8_t colorBits; + uint8_t alphaBits; + uint8_t endpointPBits; + uint8_t sharedPBits; + uint8_t indexBits[2]; + }; + + static const Bc7ModeInfo s_bp7ModeInfo[] = + { // +---------------------------- num subsets + // | +------------------------- partition bits + // | | +---------------------- rotation bits + // | | | +------------------- index selection bits + // | | | | +---------------- color bits + // | | | | | +------------- alpha bits + // | | | | | | +---------- endpoint P-bits + // | | | | | | | +------- shared P-bits + // | | | | | | | | +-- 2x index bits + { 3, 4, 0, 0, 4, 0, 1, 0, { 3, 0 } }, // 0 + { 2, 6, 0, 0, 6, 0, 0, 1, { 3, 0 } }, // 1 + { 3, 6, 0, 0, 5, 0, 0, 0, { 2, 0 } }, // 2 + { 2, 6, 0, 0, 7, 0, 1, 0, { 2, 0 } }, // 3 + { 1, 0, 2, 1, 5, 6, 0, 0, { 2, 3 } }, // 4 + { 1, 0, 2, 0, 7, 8, 0, 0, { 2, 2 } }, // 5 + { 1, 0, 0, 0, 7, 7, 1, 0, { 4, 0 } }, // 6 + { 2, 6, 0, 0, 5, 5, 1, 0, { 2, 0 } }, // 7 + }; + + void decodeBlockBc7(uint8_t _dst[16*4], const uint8_t _src[16]) + { + BitReader bit(_src); + + uint8_t mode = 0; + for (; mode < 8 && 0 == bit.read(1); ++mode) + { + } + + if (mode == 8) + { + bx::memSet(_dst, 0, 16*4); + return; + } + + const Bc7ModeInfo& mi = s_bp7ModeInfo[mode]; + const uint8_t modePBits = 0 != mi.endpointPBits + ? mi.endpointPBits + : mi.sharedPBits + ; + + const uint8_t partitionSetIdx = uint8_t(bit.read(mi.partitionBits) ); + const uint8_t rotationMode = uint8_t(bit.read(mi.rotationBits) ); + const uint8_t indexSelectionMode = uint8_t(bit.read(mi.indexSelectionBits) ); + + uint8_t epR[6]; + uint8_t epG[6]; + uint8_t epB[6]; + uint8_t epA[6]; + + for (uint8_t ii = 0; ii < mi.numSubsets; ++ii) + { + epR[ii*2+0] = uint8_t(bit.read(mi.colorBits) << modePBits); + epR[ii*2+1] = uint8_t(bit.read(mi.colorBits) << modePBits); + } + + for (uint8_t ii = 0; ii < mi.numSubsets; ++ii) + { + epG[ii*2+0] = uint8_t(bit.read(mi.colorBits) << modePBits); + epG[ii*2+1] = uint8_t(bit.read(mi.colorBits) << modePBits); + } + + for (uint8_t ii = 0; ii < mi.numSubsets; ++ii) + { + epB[ii*2+0] = uint8_t(bit.read(mi.colorBits) << modePBits); + epB[ii*2+1] = uint8_t(bit.read(mi.colorBits) << modePBits); + } + + if (mi.alphaBits) + { + for (uint8_t ii = 0; ii < mi.numSubsets; ++ii) + { + epA[ii*2+0] = uint8_t(bit.read(mi.alphaBits) << modePBits); + epA[ii*2+1] = uint8_t(bit.read(mi.alphaBits) << modePBits); + } + } + else + { + bx::memSet(epA, 0xff, 6); + } + + if (0 != modePBits) + { + for (uint8_t ii = 0; ii < mi.numSubsets; ++ii) + { + const uint8_t pda = uint8_t( bit.read(modePBits) ); + const uint8_t pdb = uint8_t(0 == mi.sharedPBits ? bit.read(modePBits) : pda); + + epR[ii*2+0] |= pda; + epR[ii*2+1] |= pdb; + epG[ii*2+0] |= pda; + epG[ii*2+1] |= pdb; + epB[ii*2+0] |= pda; + epB[ii*2+1] |= pdb; + epA[ii*2+0] |= pda; + epA[ii*2+1] |= pdb; + } + } + + const uint8_t colorBits = mi.colorBits + modePBits; + + for (uint8_t ii = 0; ii < mi.numSubsets; ++ii) + { + epR[ii*2+0] = bitRangeConvert(epR[ii*2+0], colorBits, 8); + epR[ii*2+1] = bitRangeConvert(epR[ii*2+1], colorBits, 8); + epG[ii*2+0] = bitRangeConvert(epG[ii*2+0], colorBits, 8); + epG[ii*2+1] = bitRangeConvert(epG[ii*2+1], colorBits, 8); + epB[ii*2+0] = bitRangeConvert(epB[ii*2+0], colorBits, 8); + epB[ii*2+1] = bitRangeConvert(epB[ii*2+1], colorBits, 8); + } + + if (mi.alphaBits) + { + const uint8_t alphaBits = mi.alphaBits + modePBits; + + for (uint8_t ii = 0; ii < mi.numSubsets; ++ii) + { + epA[ii*2+0] = bitRangeConvert(epA[ii*2+0], alphaBits, 8); + epA[ii*2+1] = bitRangeConvert(epA[ii*2+1], alphaBits, 8); + } + } + + const bool hasIndexBits1 = 0 != mi.indexBits[1]; + + const uint8_t* factors[] = + { + s_bptcFactors[mi.indexBits[0]-2], + hasIndexBits1 ? s_bptcFactors[mi.indexBits[1]-2] : factors[0], + }; + + uint16_t offset[2] = + { + 0, + uint16_t(mi.numSubsets*(16*mi.indexBits[0]-1) ), + }; + + for (uint8_t yy = 0; yy < 4; ++yy) + { + for (uint8_t xx = 0; xx < 4; ++xx) + { + const uint8_t idx = yy*4+xx; + + uint8_t subsetIndex = 0; + uint8_t indexAnchor = 0; + switch (mi.numSubsets) + { + case 2: + subsetIndex = (s_bptcP2[partitionSetIdx] >> idx) & 1; + indexAnchor = 0 != subsetIndex ? s_bptcA2[partitionSetIdx] : 0; + break; + + case 3: + subsetIndex = (s_bptcP3[partitionSetIdx] >> (2*idx) ) & 3; + indexAnchor = 0 != subsetIndex ? s_bptcA3[subsetIndex-1][partitionSetIdx] : 0; + break; + + default: + break; + } + + const uint8_t anchor = idx == indexAnchor; + const uint8_t num[2] = + { + uint8_t( mi.indexBits[0] - anchor ), + uint8_t(hasIndexBits1 ? mi.indexBits[1] - anchor : 0), + }; + + const uint8_t index[2] = + { + (uint8_t)bit.peek(offset[0], num[0]), + hasIndexBits1 ? (uint8_t)bit.peek(offset[1], num[1]) : index[0], + }; + + offset[0] += num[0]; + offset[1] += num[1]; + + const uint8_t fc = factors[ indexSelectionMode][index[ indexSelectionMode] ]; + const uint8_t fa = factors[!indexSelectionMode][index[!indexSelectionMode] ]; + + const uint8_t fca = 64 - fc; + const uint8_t fcb = fc; + const uint8_t faa = 64 - fa; + const uint8_t fab = fa; + + subsetIndex *= 2; + uint8_t rr = uint8_t(uint16_t(epR[subsetIndex]*fca + epR[subsetIndex + 1]*fcb + 32) >> 6); + uint8_t gg = uint8_t(uint16_t(epG[subsetIndex]*fca + epG[subsetIndex + 1]*fcb + 32) >> 6); + uint8_t bb = uint8_t(uint16_t(epB[subsetIndex]*fca + epB[subsetIndex + 1]*fcb + 32) >> 6); + uint8_t aa = uint8_t(uint16_t(epA[subsetIndex]*faa + epA[subsetIndex + 1]*fab + 32) >> 6); + + switch (rotationMode) + { + case 1: bx::xchg(aa, rr); break; + case 2: bx::xchg(aa, gg); break; + case 3: bx::xchg(aa, bb); break; + default: break; + }; + + uint8_t* bgra = &_dst[idx*4]; + bgra[0] = bb; + bgra[1] = gg; + bgra[2] = rr; + bgra[3] = aa; + } + } + } + + static const int32_t s_etc1Mod[8][4] = + { + { 2, 8, -2, -8 }, + { 5, 17, -5, -17 }, + { 9, 29, -9, -29 }, + { 13, 42, -13, -42 }, + { 18, 60, -18, -60 }, + { 24, 80, -24, -80 }, + { 33, 106, -33, -106 }, + { 47, 183, -47, -183 }, + }; + + static const uint8_t s_etc2Mod[] = { 3, 6, 11, 16, 23, 32, 41, 64 }; uint8_t uint8_sat(int32_t _a) { @@ -1893,10 +2891,10 @@ namespace bimg const uint8_t numMips = _hasMips ? imageGetNumMips(_format, _width, _height, _depth) : 1; uint32_t size = imageGetSize(NULL, _width, _height, _depth, _cubeMap, _hasMips, _numLayers, _format); - ImageContainer* imageContainer = (ImageContainer*)BX_ALLOC(_allocator, size + sizeof(ImageContainer) ); + ImageContainer* imageContainer = (ImageContainer*)BX_ALIGNED_ALLOC(_allocator, size + BX_ALIGN_16(sizeof(ImageContainer) ), 16); imageContainer->m_allocator = _allocator; - imageContainer->m_data = imageContainer + 1; + imageContainer->m_data = bx::alignPtr(imageContainer + 1, 0, 16); imageContainer->m_format = _format; imageContainer->m_orientation = Orientation::R0; imageContainer->m_size = size; @@ -1922,7 +2920,7 @@ namespace bimg void imageFree(ImageContainer* _imageContainer) { - BX_FREE(_imageContainer->m_allocator, _imageContainer); + BX_ALIGNED_FREE(_imageContainer->m_allocator, _imageContainer, 16); } // DDS @@ -2911,18 +3909,18 @@ namespace bimg { uint32_t size = imageGetSize(NULL, uint16_t(_width), uint16_t(_height), 0, false, false, 1, TextureFormat::RGBA8); void* temp = BX_ALLOC(_allocator, size); - imageDecodeToRgba8(temp, _src, _width, _height, _width*4, _srcFormat); - imageConvert(dst, TextureFormat::R8, temp, TextureFormat::RGBA8, _width, _height, 1, _width*4); + imageDecodeToRgba8(_allocator, temp, _src, _width, _height, _width*4, _srcFormat); + imageConvert(_allocator, dst, TextureFormat::R8, temp, TextureFormat::RGBA8, _width, _height, 1, _width*4); BX_FREE(_allocator, temp); } else { - imageConvert(dst, TextureFormat::R8, src, _srcFormat, _width, _height, 1, srcPitch); + imageConvert(_allocator, dst, TextureFormat::R8, src, _srcFormat, _width, _height, 1, srcPitch); } } } - void imageDecodeToBgra8(void* _dst, const void* _src, uint32_t _width, uint32_t _height, uint32_t _dstPitch, TextureFormat::Enum _srcFormat) + void imageDecodeToBgra8(bx::AllocatorI* _allocator, void* _dst, const void* _src, uint32_t _width, uint32_t _height, uint32_t _dstPitch, TextureFormat::Enum _srcFormat) { const uint8_t* src = (const uint8_t*)_src; uint8_t* dst = (uint8_t*)_dst; @@ -3034,6 +4032,40 @@ namespace bimg } break; + case TextureFormat::BC6H: + { + ImageContainer* rgba32f = imageAlloc(_allocator + , TextureFormat::RGBA32F + , uint16_t(_width) + , uint16_t(_height) + , uint16_t(1) + , 1 + , false + , false + ); + imageDecodeToRgba32f(_allocator, rgba32f->m_data, _src, _width, _height, 1, _width*16, _srcFormat); + imageConvert(_allocator, _dst, TextureFormat::BGRA8, rgba32f->m_data, TextureFormat::RGBA32F, _width, _height, 1, _width*16); + imageFree(rgba32f); + } + break; + + case TextureFormat::BC7: + for (uint32_t yy = 0; yy < height; ++yy) + { + for (uint32_t xx = 0; xx < width; ++xx) + { + decodeBlockBc7(temp, src); + src += 16; + + uint8_t* block = &dst[yy*_dstPitch*4 + xx*16]; + bx::memCopy(&block[0*_dstPitch], &temp[ 0], 16); + bx::memCopy(&block[1*_dstPitch], &temp[16], 16); + bx::memCopy(&block[2*_dstPitch], &temp[32], 16); + bx::memCopy(&block[3*_dstPitch], &temp[48], 16); + } + } + break; + case TextureFormat::ETC1: case TextureFormat::ETC2: for (uint32_t yy = 0; yy < height; ++yy) @@ -3133,7 +4165,7 @@ namespace bimg { const uint32_t srcBpp = s_imageBlockInfo[_srcFormat].bitsPerPixel; const uint32_t srcPitch = _width * srcBpp / 8; - if (!imageConvert(_dst, TextureFormat::BGRA8, _src, _srcFormat, _width, _height, 1, srcPitch) ) + if (!imageConvert(_allocator, _dst, TextureFormat::BGRA8, _src, _srcFormat, _width, _height, 1, srcPitch) ) { // Failed to convert, just make ugly red-yellow checkerboard texture. imageCheckerboard(_dst, _width, _height, 16, UINT32_C(0xffff0000), UINT32_C(0xffffff00) ); @@ -3143,7 +4175,7 @@ namespace bimg } } - void imageDecodeToRgba8(void* _dst, const void* _src, uint32_t _width, uint32_t _height, uint32_t _dstPitch, TextureFormat::Enum _srcFormat) + void imageDecodeToRgba8(bx::AllocatorI* _allocator, void* _dst, const void* _src, uint32_t _width, uint32_t _height, uint32_t _dstPitch, TextureFormat::Enum _srcFormat) { switch (_srcFormat) { @@ -3165,7 +4197,7 @@ namespace bimg default: { const uint32_t srcPitch = _width * 4; - imageDecodeToBgra8(_dst, _src, _width, _height, _dstPitch, _srcFormat); + imageDecodeToBgra8(_allocator, _dst, _src, _width, _height, _dstPitch, _srcFormat); imageSwizzleBgra8(_dst, _dstPitch, _width, _height, _dst, srcPitch); } break; @@ -3214,7 +4246,7 @@ namespace bimg const uint8_t* src = (const uint8_t*)_src; using namespace bx; - const simd128_t unpack = simd_ld(1.0f, 1.0f/256.0f, 1.0f/65536.0f, 1.0f/16777216.0f); + const simd128_t unpack = simd_ld(1.0f/256.0f, 1.0f/256.0f/256.0f, 1.0f/65536.0f/256.0f, 1.0f/16777216.0f/256.0f); const simd128_t umask = simd_ild(0xff, 0xff00, 0xff0000, 0xff000000); const simd128_t wflip = simd_ild(0, 0, 0, 0x80000000); const simd128_t wadd = simd_ld(0.0f, 0.0f, 0.0f, 32768.0f*65536.0f); @@ -3284,6 +4316,31 @@ namespace bimg } break; + case TextureFormat::BC6H: + { + uint32_t width = _width/4; + uint32_t height = _height/4; + + const uint8_t* srcData = src; + + for (uint32_t yy = 0; yy < height; ++yy) + { + for (uint32_t xx = 0; xx < width; ++xx) + { + float tmp[16*4]; + decodeBlockBc6h(tmp, srcData); + srcData += 16; + + uint8_t* block = (uint8_t*)&dst[yy*_dstPitch*4 + xx*64]; + bx::memCopy(&block[0*_dstPitch], &tmp[ 0], 64); + bx::memCopy(&block[1*_dstPitch], &tmp[16], 64); + bx::memCopy(&block[2*_dstPitch], &tmp[32], 64); + bx::memCopy(&block[3*_dstPitch], &tmp[48], 64); + } + } + } + break; + case TextureFormat::RGBA32F: bx::memCopy(dst, src, _dstPitch*_height); break; @@ -3293,13 +4350,13 @@ namespace bimg { uint32_t size = imageGetSize(NULL, uint16_t(_width), uint16_t(_height), 0, false, false, 1, TextureFormat::RGBA8); void* temp = BX_ALLOC(_allocator, size); - imageDecodeToRgba8(temp, src, _width, _height, _width*4, _srcFormat); + imageDecodeToRgba8(_allocator, temp, src, _width, _height, _width*4, _srcFormat); imageRgba8ToRgba32f(dst, _width, _height, _width*4, temp); BX_FREE(_allocator, temp); } else { - imageConvert(dst, TextureFormat::RGBA32F, src, _srcFormat, _width, _height, 1, srcPitch); + imageConvert(_allocator, dst, TextureFormat::RGBA32F, src, _srcFormat, _width, _height, 1, srcPitch); } break; } diff --git a/src/image_encode.cpp b/src/image_encode.cpp index f7abf4c..3e29ef1 100644 --- a/src/image_encode.cpp +++ b/src/image_encode.cpp @@ -35,7 +35,7 @@ namespace bimg }; BX_STATIC_ASSERT(Quality::Count == BX_COUNTOF(s_squishQuality) ); - void imageEncodeFromRgba8(void* _dst, const void* _src, uint32_t _width, uint32_t _height, uint32_t _depth, TextureFormat::Enum _format, Quality::Enum _quality, bx::Error* _err) + void imageEncodeFromRgba8(bx::AllocatorI* _allocator, void* _dst, const void* _src, uint32_t _width, uint32_t _height, uint32_t _depth, TextureFormat::Enum _format, Quality::Enum _quality, bx::Error* _err) { const uint8_t* src = (const uint8_t*)_src; uint8_t* dst = (uint8_t*)_dst; @@ -131,7 +131,7 @@ namespace bimg break; default: - if (!imageConvert(dst, _format, src, TextureFormat::RGBA8, _width, _height, 1) ) + if (!imageConvert(_allocator, dst, _format, src, TextureFormat::RGBA8, _width, _height, 1) ) { BX_ERROR_SET(_err, BIMG_ERROR, "Unable to convert between input/output formats!"); } @@ -157,10 +157,10 @@ namespace bimg break; default: - if (!imageConvert(_dst, _dstFormat, _src, TextureFormat::RGBA32F, _width, _height, _depth) ) + if (!imageConvert(_allocator, _dst, _dstFormat, _src, TextureFormat::RGBA32F, _width, _height, _depth) ) { uint8_t* temp = (uint8_t*)BX_ALLOC(_allocator, _width*_height*_depth*4); - if (imageConvert(temp, TextureFormat::RGBA8, _src, TextureFormat::RGBA32F, _width, _height, _depth) ) + if (imageConvert(_allocator, temp, TextureFormat::RGBA8, _src, TextureFormat::RGBA32F, _width, _height, _depth) ) { for (uint32_t zz = 0; zz < _depth; ++zz) { @@ -183,7 +183,7 @@ namespace bimg } } - imageEncodeFromRgba8(_dst, temp, _width, _height, _depth, _dstFormat, _quality, _err); + imageEncodeFromRgba8(_allocator, _dst, temp, _width, _height, _depth, _dstFormat, _quality, _err); } else { @@ -211,8 +211,8 @@ namespace bimg case bimg::TextureFormat::PTC14A: { uint8_t* temp = (uint8_t*)BX_ALLOC(_allocator, _width*_height*_depth*4); - imageDecodeToRgba8(temp, _src, _width, _height, _width*4, _srcFormat); - imageEncodeFromRgba8(_dst, temp, _width, _height, _depth, _dstFormat, _quality, _err); + imageDecodeToRgba8(_allocator, temp, _src, _width, _height, _width*4, _srcFormat); + imageEncodeFromRgba8(_allocator, _dst, temp, _width, _height, _depth, _dstFormat, _quality, _err); BX_FREE(_allocator, temp); } break; diff --git a/tools/texturec/texturec.cpp b/tools/texturec/texturec.cpp index 972891c..58321cb 100644 --- a/tools/texturec/texturec.cpp +++ b/tools/texturec/texturec.cpp @@ -26,7 +26,7 @@ #include #define BIMG_TEXTUREC_VERSION_MAJOR 1 -#define BIMG_TEXTUREC_VERSION_MINOR 14 +#define BIMG_TEXTUREC_VERSION_MINOR 15 struct Options { @@ -145,8 +145,8 @@ bimg::ImageContainer* convert(bx::AllocatorI* _allocator, const void* _inputData if (NULL != input) { - const bimg::TextureFormat::Enum inputFormat = input->m_format; - bimg::TextureFormat::Enum outputFormat = input->m_format; + bimg::TextureFormat::Enum inputFormat = input->m_format; + bimg::TextureFormat::Enum outputFormat = input->m_format; if (bimg::TextureFormat::Count != _options.format) { @@ -211,7 +211,7 @@ bimg::ImageContainer* convert(bx::AllocatorI* _allocator, const void* _inputData if (needResize) { - bimg::ImageContainer* src = bimg::imageConvert(_allocator, bimg::TextureFormat::RGBA32F, *input); + bimg::ImageContainer* src = bimg::imageConvert(_allocator, bimg::TextureFormat::RGBA32F, *input, false); bimg::ImageContainer* dst = bimg::imageAlloc( _allocator @@ -229,6 +229,18 @@ bimg::ImageContainer* convert(bx::AllocatorI* _allocator, const void* _inputData bimg::imageFree(src); bimg::imageFree(input); + if (bimg::isCompressed(inputFormat) ) + { + if (inputFormat == bimg::TextureFormat::BC6H) + { + inputFormat = bimg::TextureFormat::RGBA32F; + } + else + { + inputFormat = bimg::TextureFormat::RGBA8; + } + } + input = bimg::imageConvert(_allocator, inputFormat, *dst); bimg::imageFree(dst); } @@ -396,7 +408,7 @@ bimg::ImageContainer* convert(bx::AllocatorI* _allocator, const void* _inputData BX_FREE(_allocator, rgbaDst); } // HDR - else if ( (!bimg::isCompressed(input->m_format) && 8 != inputBlockInfo.rBits) + else if ( (!bimg::isCompressed(inputFormat) && 8 != inputBlockInfo.rBits) || outputFormat == bimg::TextureFormat::BC6H || outputFormat == bimg::TextureFormat::BC7 ) @@ -559,7 +571,9 @@ bimg::ImageContainer* convert(bx::AllocatorI* _allocator, const void* _inputData temp = BX_ALLOC(_allocator, size); uint8_t* rgba = (uint8_t*)temp; - bimg::imageDecodeToRgba8(rgba + bimg::imageDecodeToRgba8( + _allocator + , rgba , mip.m_data , mip.m_width , mip.m_height @@ -600,7 +614,9 @@ bimg::ImageContainer* convert(bx::AllocatorI* _allocator, const void* _inputData bimg::imageGetRawData(*output, side, 0, output->m_data, output->m_size, dstMip); dstData = const_cast(dstMip.m_data); - bimg::imageEncodeFromRgba8(dstData + bimg::imageEncodeFromRgba8( + _allocator + , dstData , rgba , dstMip.m_width , dstMip.m_height @@ -647,7 +663,9 @@ bimg::ImageContainer* convert(bx::AllocatorI* _allocator, const void* _inputData bimg::imageGetRawData(*output, side, lod, output->m_data, output->m_size, dstMip); dstData = const_cast(dstMip.m_data); - bimg::imageEncodeFromRgba8(dstData + bimg::imageEncodeFromRgba8( + _allocator + , dstData , rgba , dstMip.m_width , dstMip.m_height @@ -660,7 +678,9 @@ bimg::ImageContainer* convert(bx::AllocatorI* _allocator, const void* _inputData if (NULL != ref) { - bimg::imageDecodeToRgba8(rgba + bimg::imageDecodeToRgba8( + _allocator + , rgba , output->m_data , mip.m_width , mip.m_height @@ -776,6 +796,30 @@ void help(const char* _str, const bx::Error& _err) help(str.c_str(), false); } +class AlignedAllocator : public bx::AllocatorI +{ +public: + AlignedAllocator(bx::AllocatorI* _allocator, size_t _minAlignment) + : m_allocator(_allocator) + , m_minAlignment(_minAlignment) + { + } + + virtual void* realloc( + void* _ptr + , size_t _size + , size_t _align + , const char* _file + , uint32_t _line + ) + { + return m_allocator->realloc(_ptr, _size, bx::max(_align, m_minAlignment), _file, _line); + } + + bx::AllocatorI* m_allocator; + size_t m_minAlignment; +}; + int main(int _argc, const char* _argv[]) { bx::CommandLine cmdLine(_argc, _argv); @@ -927,7 +971,9 @@ int main(int _argc, const char* _argv[]) return bx::kExitFailure; } - bx::DefaultAllocator allocator; + bx::DefaultAllocator defaultAllocator; + AlignedAllocator allocator(&defaultAllocator, 16); + uint8_t* inputData = (uint8_t*)BX_ALLOC(&allocator, inputSize); bx::read(&reader, inputData, inputSize, &err);