From 8c3aabc3c1536a914a66a73c9865bc87ceade839 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=91=D1=80=D0=B0=D0=BD=D0=B8=D0=BC=D0=B8=D1=80=20=D0=9A?=
 =?UTF-8?q?=D0=B0=D1=80=D0=B0=D1=9F=D0=B8=D1=9B?=
 <branimirkaradzic@gmail.com>
Date: Thu, 10 Nov 2022 17:27:01 -0800
Subject: [PATCH] Updated astc-encoder.

---
 3rdparty/astc-encoder/include/astcenc.h       |  47 ++++-
 .../source/astcenc_color_quantize.cpp         |  90 +++-----
 .../source/astcenc_color_unquantize.cpp       |  11 +-
 .../source/astcenc_compress_symbolic.cpp      |  68 ++++--
 .../astc-encoder/source/astcenc_entry.cpp     | 107 +++++++---
 .../source/astcenc_find_best_partitioning.cpp | 166 +++++++++------
 .../astc-encoder/source/astcenc_internal.h    | 142 ++++++-------
 .../source/astcenc_vecmathlib_common_4.h      |  17 ++
 .../source/astcenc_weight_align.cpp           | 198 ++----------------
 scripts/bimg.lua                              |   6 +
 scripts/bimg_encode.lua                       |  10 +-
 src/image.cpp                                 |  64 ++++--
 src/image_encode.cpp                          |  82 +++++---
 13 files changed, 496 insertions(+), 512 deletions(-)

diff --git a/3rdparty/astc-encoder/include/astcenc.h b/3rdparty/astc-encoder/include/astcenc.h
index 5d7af53..56f1ad8 100644
--- a/3rdparty/astc-encoder/include/astcenc.h
+++ b/3rdparty/astc-encoder/include/astcenc.h
@@ -241,6 +241,9 @@ static const float ASTCENC_PRE_MEDIUM = 60.0f;
 /** @brief The thorough quality search preset. */
 static const float ASTCENC_PRE_THOROUGH = 98.0f;
 
+/** @brief The thorough quality search preset. */
+static const float ASTCENC_PRE_VERYTHOROUGH = 99.0f;
+
 /** @brief The exhaustive, highest quality, search preset. */
 static const float ASTCENC_PRE_EXHAUSTIVE = 100.0f;
 
@@ -440,11 +443,25 @@ struct astcenc_config
 	unsigned int tune_partition_count_limit;
 
 	/**
-	 * @brief The maximum number of partitions searched (-partitionindexlimit).
+	 * @brief The maximum number of partitions searched (-2partitionindexlimit).
 	 *
 	 * Valid values are between 1 and 1024.
 	 */
-	unsigned int tune_partition_index_limit;
+	unsigned int tune_2partition_index_limit;
+
+	/**
+	 * @brief The maximum number of partitions searched (-3partitionindexlimit).
+	 *
+	 * Valid values are between 1 and 1024.
+	 */
+	unsigned int tune_3partition_index_limit;
+
+	/**
+	 * @brief The maximum number of partitions searched (-4partitionindexlimit).
+	 *
+	 * Valid values are between 1 and 1024.
+	 */
+	unsigned int tune_4partition_index_limit;
 
 	/**
 	 * @brief The maximum centile for block modes searched (-blockmodelimit).
@@ -468,6 +485,27 @@ struct astcenc_config
 	 */
 	unsigned int tune_candidate_limit;
 
+	/**
+	 * @brief The number of trial partitionings per search (-2partitioncandidatelimit).
+	 *
+	 * Valid values are between 1 and TUNE_MAX_PARTITIIONING_CANDIDATES.
+	 */
+	unsigned int tune_2partitioning_candidate_limit;
+
+	/**
+	 * @brief The number of trial partitionings per search (-3partitioncandidatelimit).
+	 *
+	 * Valid values are between 1 and TUNE_MAX_PARTITIIONING_CANDIDATES.
+	 */
+	unsigned int tune_3partitioning_candidate_limit;
+
+	/**
+	 * @brief The number of trial partitionings per search (-4partitioncandidatelimit).
+	 *
+	 * Valid values are between 1 and TUNE_MAX_PARTITIIONING_CANDIDATES.
+	 */
+	unsigned int tune_4partitioning_candidate_limit;
+
 	/**
 	 * @brief The dB threshold for stopping block search (-dblimit).
 	 *
@@ -517,11 +555,6 @@ struct astcenc_config
 	 */
 	float tune_2_plane_early_out_limit_correlation;
 
-	/**
-	 * @brief The threshold below which (inclusive) we stop testing low/high/low+high cutoffs.
-	 */
-	unsigned int tune_low_weight_count_limit;
-
 #if defined(ASTCENC_DIAGNOSTICS)
 	/**
 	 * @brief The path to save the diagnostic trace data to.
diff --git a/3rdparty/astc-encoder/source/astcenc_color_quantize.cpp b/3rdparty/astc-encoder/source/astcenc_color_quantize.cpp
index 176754f..278f334 100644
--- a/3rdparty/astc-encoder/source/astcenc_color_quantize.cpp
+++ b/3rdparty/astc-encoder/source/astcenc_color_quantize.cpp
@@ -334,13 +334,13 @@ static bool try_quantize_rgb_delta(
 	int g0be = quant_color(quant_level, g0b);
 	int b0be = quant_color(quant_level, b0b);
 
-	r0b = unquant_color(quant_level, r0be);
-	g0b = unquant_color(quant_level, g0be);
-	b0b = unquant_color(quant_level, b0be);
+	int r0bu = unquant_color(quant_level, r0be);
+	int g0bu = unquant_color(quant_level, g0be);
+	int b0bu = unquant_color(quant_level, b0be);
 
-	r0b |= r0a & 0x100;
-	g0b |= g0a & 0x100;
-	b0b |= b0a & 0x100;
+	r0b = r0bu | (r0a & 0x100);
+	g0b = g0bu | (g0a & 0x100);
+	b0b = b0bu | (b0a & 0x100);
 
 	// Get hold of the second value
 	int r1d = astc::flt2int_rtn(r1);
@@ -386,36 +386,18 @@ static bool try_quantize_rgb_delta(
 		return false;
 	}
 
-	// Check that the sum of the encoded offsets is nonnegative, else encoding fails
-	r1du &= 0x7f;
-	g1du &= 0x7f;
-	b1du &= 0x7f;
-
-	if (r1du & 0x40)
-	{
-		r1du -= 0x80;
-	}
-
-	if (g1du & 0x40)
-	{
-		g1du -= 0x80;
-	}
-
-	if (b1du & 0x40)
-	{
-		b1du -= 0x80;
-	}
-
-	if (r1du + g1du + b1du < 0)
+	// If the sum of offsets triggers blue-contraction then encoding fails
+	vint4 ep0(r0bu, g0bu, b0bu, 0);
+	vint4 ep1(r1du, g1du, b1du, 0);
+	bit_transfer_signed(ep1, ep0);
+	if (hadd_rgb_s(ep1) < 0)
 	{
 		return false;
 	}
 
 	// Check that the offsets produce legitimate sums as well
-	r1du += r0b;
-	g1du += g0b;
-	b1du += b0b;
-	if (r1du < 0 || r1du > 0x1FF || g1du < 0 || g1du > 0x1FF || b1du < 0 || b1du > 0x1FF)
+	ep0 = ep0 + ep1;
+	if (any((ep0 < vint4(0)) | (ep0 > vint4(0xFF))))
 	{
 		return false;
 	}
@@ -477,13 +459,13 @@ static bool try_quantize_rgb_delta_blue_contract(
 	int g0be = quant_color(quant_level, g0b);
 	int b0be = quant_color(quant_level, b0b);
 
-	r0b = unquant_color(quant_level, r0be);
-	g0b = unquant_color(quant_level, g0be);
-	b0b = unquant_color(quant_level, b0be);
+	int r0bu = unquant_color(quant_level, r0be);
+	int g0bu = unquant_color(quant_level, g0be);
+	int b0bu = unquant_color(quant_level, b0be);
 
-	r0b |= r0a & 0x100;
-	g0b |= g0a & 0x100;
-	b0b |= b0a & 0x100;
+	r0b = r0bu | (r0a & 0x100);
+	g0b = g0bu | (g0a & 0x100);
+	b0b = b0bu | (b0a & 0x100);
 
 	// Get hold of the second value
 	int r1d = astc::flt2int_rtn(r1);
@@ -530,38 +512,18 @@ static bool try_quantize_rgb_delta_blue_contract(
 		return false;
 	}
 
-	// Check that the sum of the encoded offsets is negative, else encoding fails
-	// Note that this is inverse of the test for non-blue-contracted RGB.
-	r1du &= 0x7f;
-	g1du &= 0x7f;
-	b1du &= 0x7f;
-
-	if (r1du & 0x40)
-	{
-		r1du -= 0x80;
-	}
-
-	if (g1du & 0x40)
-	{
-		g1du -= 0x80;
-	}
-
-	if (b1du & 0x40)
-	{
-		b1du -= 0x80;
-	}
-
-	if (r1du + g1du + b1du >= 0)
+	// If the sum of offsets does not trigger blue-contraction then encoding fails
+	vint4 ep0(r0bu, g0bu, b0bu, 0);
+	vint4 ep1(r1du, g1du, b1du, 0);
+	bit_transfer_signed(ep1, ep0);
+	if (hadd_rgb_s(ep1) >= 0)
 	{
 		return false;
 	}
 
 	// Check that the offsets produce legitimate sums as well
-	r1du += r0b;
-	g1du += g0b;
-	b1du += b0b;
-
-	if (r1du < 0 || r1du > 0x1FF || g1du < 0 || g1du > 0x1FF || b1du < 0 || b1du > 0x1FF)
+	ep0 = ep0 + ep1;
+	if (any((ep0 < vint4(0)) | (ep0 > vint4(0xFF))))
 	{
 		return false;
 	}
diff --git a/3rdparty/astc-encoder/source/astcenc_color_unquantize.cpp b/3rdparty/astc-encoder/source/astcenc_color_unquantize.cpp
index c0aeebd..203615c 100644
--- a/3rdparty/astc-encoder/source/astcenc_color_unquantize.cpp
+++ b/3rdparty/astc-encoder/source/astcenc_color_unquantize.cpp
@@ -97,15 +97,8 @@ static void rgba_delta_unpack(
 	vint4 input0 = unquant_color(quant_level, input0q);
 	vint4 input1 = unquant_color(quant_level, input1q);
 
-	// Perform bit-transfer
-	input0 = input0 | lsl<1>(input1 & 0x80);
-	input1 = input1 & 0x7F;
-	vmask4 mask = (input1 & 0x40) != vint4::zero();
-	input1 = select(input1, input1 - 0x80, mask);
-
-	// Scale
-	input0 = asr<1>(input0);
-	input1 = asr<1>(input1);
+	// Apply bit transfer
+	bit_transfer_signed(input1, input0);
 
 	// Apply blue-uncontraction if needed
 	int rgb_sum = hadd_rgb_s(input1);
diff --git a/3rdparty/astc-encoder/source/astcenc_compress_symbolic.cpp b/3rdparty/astc-encoder/source/astcenc_compress_symbolic.cpp
index 1a58b0e..68bde08 100644
--- a/3rdparty/astc-encoder/source/astcenc_compress_symbolic.cpp
+++ b/3rdparty/astc-encoder/source/astcenc_compress_symbolic.cpp
@@ -424,11 +424,7 @@ static float compress_symbolic_block_for_partition_1plane(
 
 	// For each mode, use the angular method to compute a shift
 	compute_angular_endpoints_1plane(
-	    config.tune_low_weight_count_limit,
-	    only_always, bsd,
-	    dec_weights_ideal,
-	    max_weight_quant,
-	    tmpbuf);
+	    only_always, bsd, dec_weights_ideal, max_weight_quant, tmpbuf);
 
 	float* weight_low_value = tmpbuf.weight_low_value1;
 	float* weight_high_value = tmpbuf.weight_high_value1;
@@ -795,9 +791,7 @@ static float compress_symbolic_block_for_partition_2planes(
 	float min_wt_cutoff2 = hmin_s(select(err_max, min_ep2, err_mask));
 
 	compute_angular_endpoints_2planes(
-	    config.tune_low_weight_count_limit,
-	    bsd, dec_weights_ideal, max_weight_quant,
-	    tmpbuf);
+	    bsd, dec_weights_ideal, max_weight_quant, tmpbuf);
 
 	// For each mode (which specifies a decimation and a quantization):
 	//     * Compute number of bits needed for the quantized weights
@@ -1130,12 +1124,13 @@ static float prepare_block_statistics(
 
 	aa_var -= as * (as * rpt);
 
-	rg_cov *= astc::rsqrt(astc::max(rr_var * gg_var, 1e-30f));
-	rb_cov *= astc::rsqrt(astc::max(rr_var * bb_var, 1e-30f));
-	ra_cov *= astc::rsqrt(astc::max(rr_var * aa_var, 1e-30f));
-	gb_cov *= astc::rsqrt(astc::max(gg_var * bb_var, 1e-30f));
-	ga_cov *= astc::rsqrt(astc::max(gg_var * aa_var, 1e-30f));
-	ba_cov *= astc::rsqrt(astc::max(bb_var * aa_var, 1e-30f));
+	// These will give a NaN if a channel is constant - these are fixed up in the next step
+	rg_cov *= astc::rsqrt(rr_var * gg_var);
+	rb_cov *= astc::rsqrt(rr_var * bb_var);
+	ra_cov *= astc::rsqrt(rr_var * aa_var);
+	gb_cov *= astc::rsqrt(gg_var * bb_var);
+	ga_cov *= astc::rsqrt(gg_var * aa_var);
+	ba_cov *= astc::rsqrt(bb_var * aa_var);
 
 	if (astc::isnan(rg_cov)) rg_cov = 1.0f;
 	if (astc::isnan(rb_cov)) rb_cov = 1.0f;
@@ -1144,7 +1139,7 @@ static float prepare_block_statistics(
 	if (astc::isnan(ga_cov)) ga_cov = 1.0f;
 	if (astc::isnan(ba_cov)) ba_cov = 1.0f;
 
-	float lowest_correlation = astc::min(fabsf(rg_cov), fabsf(rb_cov));
+	float lowest_correlation = astc::min(fabsf(rg_cov),      fabsf(rb_cov));
 	lowest_correlation       = astc::min(lowest_correlation, fabsf(ra_cov));
 	lowest_correlation       = astc::min(lowest_correlation, fabsf(gb_cov));
 	lowest_correlation       = astc::min(lowest_correlation, fabsf(ga_cov));
@@ -1197,6 +1192,18 @@ void compress_block(
 	bool block_skip_two_plane = false;
 	int max_partitions = ctx.config.tune_partition_count_limit;
 
+	unsigned int requested_partition_indices[3] {
+		ctx.config.tune_2partition_index_limit,
+		ctx.config.tune_3partition_index_limit,
+		ctx.config.tune_4partition_index_limit
+	};
+
+	unsigned int requested_partition_trials[3] {
+		ctx.config.tune_2partitioning_candidate_limit,
+		ctx.config.tune_3partitioning_candidate_limit,
+		ctx.config.tune_4partitioning_candidate_limit
+	};
+
 #if defined(ASTCENC_DIAGNOSTICS)
 	// Do this early in diagnostic builds so we can dump uniform metrics
 	// for every block. Do it later in release builds to avoid redundant work!
@@ -1366,13 +1373,19 @@ void compress_block(
 	// Find best blocks for 2, 3 and 4 partitions
 	for (int partition_count = 2; partition_count <= max_partitions; partition_count++)
 	{
-		unsigned int partition_indices[2] { 0 };
+		unsigned int partition_indices[TUNE_MAX_PARTITIIONING_CANDIDATES];
 
-		find_best_partition_candidates(bsd, blk, partition_count,
-		                               ctx.config.tune_partition_index_limit,
-		                               partition_indices);
+		unsigned int requested_indices = requested_partition_indices[partition_count - 2];
 
-		for (unsigned int i = 0; i < 2; i++)
+		unsigned int requested_trials = requested_partition_trials[partition_count - 2];
+		requested_trials = astc::min(requested_trials, requested_indices);
+
+		unsigned int actual_trials = find_best_partition_candidates(
+		    bsd, blk, partition_count, requested_indices, partition_indices, requested_trials);
+
+		float best_error_in_prev = best_errorvals_for_pcount[partition_count - 2];
+
+		for (unsigned int i = 0; i < actual_trials; i++)
 		{
 			TRACE_NODE(node1, "pass");
 			trace_add_data("partition_count", partition_count);
@@ -1387,6 +1400,20 @@ void compress_block(
 			    scb, tmpbuf, quant_limit);
 
 			best_errorvals_for_pcount[partition_count - 1] = astc::min(best_errorvals_for_pcount[partition_count - 1], errorval);
+
+			// If using N partitions doesn't improve much over using N-1 partitions then skip trying
+			// N+1. Error can dramatically improve if the data is correlated or non-correlated and
+			// aligns with a partitioning that suits that encoding, so for this inner loop check add
+			// a large error scale because the "other" trial could be a lot better. In total the
+			// error must be at least 2x worse than the best existing error to early-out.
+			float best_error = best_errorvals_for_pcount[partition_count - 1];
+			float best_error_scale = exit_thresholds_for_pcount[partition_count - 1] * 2.0f;
+			if (best_error > (best_error_in_prev * best_error_scale))
+			{
+				trace_add_data("skip", "tune_partition_early_out_limit_factor");
+				goto END_OF_TESTS;
+			}
+
 			if (errorval < error_threshold)
 			{
 				trace_add_data("exit", "quality hit");
@@ -1396,7 +1423,6 @@ void compress_block(
 
 		// If using N partitions doesn't improve much over using N-1 partitions then skip trying N+1
 		float best_error = best_errorvals_for_pcount[partition_count - 1];
-		float best_error_in_prev = best_errorvals_for_pcount[partition_count - 2];
 		float best_error_scale = exit_thresholds_for_pcount[partition_count - 1];
 		if (best_error > (best_error_in_prev * best_error_scale))
 		{
diff --git a/3rdparty/astc-encoder/source/astcenc_entry.cpp b/3rdparty/astc-encoder/source/astcenc_entry.cpp
index 32403c7..6491c4e 100644
--- a/3rdparty/astc-encoder/source/astcenc_entry.cpp
+++ b/3rdparty/astc-encoder/source/astcenc_entry.cpp
@@ -40,10 +40,15 @@ struct astcenc_preset_config
 {
 	float quality;
 	unsigned int tune_partition_count_limit;
-	unsigned int tune_partition_index_limit;
+	unsigned int tune_2partition_index_limit;
+	unsigned int tune_3partition_index_limit;
+	unsigned int tune_4partition_index_limit;
 	unsigned int tune_block_mode_limit;
 	unsigned int tune_refinement_limit;
 	unsigned int tune_candidate_limit;
+	unsigned int tune_2partitioning_candidate_limit;
+	unsigned int tune_3partitioning_candidate_limit;
+	unsigned int tune_4partitioning_candidate_limit;
 	float tune_db_limit_a_base;
 	float tune_db_limit_b_base;
 	float tune_mode0_mse_overshoot;
@@ -51,7 +56,6 @@ struct astcenc_preset_config
 	float tune_2_partition_early_out_limit_factor;
 	float tune_3_partition_early_out_limit_factor;
 	float tune_2_plane_early_out_limit_correlation;
-	unsigned int tune_low_weight_count_limit;
 };
 
 
@@ -59,22 +63,25 @@ struct astcenc_preset_config
  * @brief The static quality presets that are built-in for high bandwidth
  * presets (x < 25 texels per block).
  */
-static const std::array<astcenc_preset_config, 5> preset_configs_high {{
+static const std::array<astcenc_preset_config, 6> preset_configs_high {{
 	{
 		ASTCENC_PRE_FASTEST,
-		2, 10, 43, 2, 2, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.0f, 0.5f, 25
+		2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.0f, 0.85f
 	}, {
 		ASTCENC_PRE_FAST,
-		3, 14, 55, 3, 3, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.1f, 0.65f, 20
+		3, 18, 10, 8, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.0f, 0.90f
 	}, {
 		ASTCENC_PRE_MEDIUM,
-		4, 28, 76, 3, 3, 95.0f, 70.0f, 2.5f, 2.5f, 1.2f, 1.25f, 0.85f, 16
+		4, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 2.5f, 2.5f, 1.1f, 1.05f, 0.95f
 	}, {
 		ASTCENC_PRE_THOROUGH,
-		4, 76, 93, 4, 4, 105.0f, 77.0f, 10.0f, 10.0f, 2.5f, 1.25f, 0.95f, 12
+		4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 10.0f, 1.35f, 1.15f, 0.97f
+	}, {
+		ASTCENC_PRE_VERYTHOROUGH,
+		4, 256, 128, 64, 98, 4, 6, 20, 14, 8, 200.0f, 200.0f, 10.0f, 10.0f, 1.6f, 1.4f, 0.98f
 	}, {
 		ASTCENC_PRE_EXHAUSTIVE,
-		4, 1024, 100, 4, 4, 200.0f, 200.0f, 10.0f, 10.0f, 10.0f, 10.0f, 0.99f, 0
+		4, 512, 512, 512, 100, 4, 8, 32, 32, 32, 200.0f, 200.0f, 10.0f, 10.0f, 2.0f, 2.0f, 0.99f
 	}
 }};
 
@@ -82,46 +89,51 @@ static const std::array<astcenc_preset_config, 5> preset_configs_high {{
  * @brief The static quality presets that are built-in for medium bandwidth
  * presets (25 <= x < 64 texels per block).
  */
-static const std::array<astcenc_preset_config, 5> preset_configs_mid {{
+static const std::array<astcenc_preset_config, 6> preset_configs_mid {{
 	{
 		ASTCENC_PRE_FASTEST,
-		2, 10, 43, 2, 2, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.0f, 0.5f, 20
+		2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.0f, 0.80f
 	}, {
 		ASTCENC_PRE_FAST,
-		3, 15, 55, 3, 3, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.1f, 0.5f, 16
+		3, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.0f, 0.85f
 	}, {
 		ASTCENC_PRE_MEDIUM,
-		4, 30, 76, 3, 3, 95.0f, 70.0f, 3.0f, 3.0f, 1.2f, 1.25f, 0.75f, 14
+		4, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.0f, 3.0f, 1.1f, 1.05f, 0.90f
 	}, {
 		ASTCENC_PRE_THOROUGH,
-		4, 76, 93, 4, 4, 105.0f, 77.0f, 10.0f, 10.0f, 2.5f, 1.25f, 0.95f, 10
+		4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 10.0f, 1.4f, 1.2f, 0.95f
+	}, {
+		ASTCENC_PRE_VERYTHOROUGH,
+		4, 256, 128, 64, 98, 4, 6, 12, 8, 3, 200.0f, 200.0f, 10.0f, 10.0f, 1.6f, 1.4f, 0.98f
 	}, {
 		ASTCENC_PRE_EXHAUSTIVE,
-		4, 1024, 100, 4, 4, 200.0f, 200.0f, 10.0f, 10.0f, 10.0f, 10.0f, 0.99f, 0
+		4, 256, 256, 256, 100, 4, 8, 32, 32, 32, 200.0f, 200.0f, 10.0f, 10.0f, 2.0f, 2.0f, 0.99f
 	}
 }};
 
-
 /**
  * @brief The static quality presets that are built-in for low bandwidth
  * presets (64 <= x texels per block).
  */
-static const std::array<astcenc_preset_config, 5> preset_configs_low {{
+static const std::array<astcenc_preset_config, 6> preset_configs_low {{
 	{
 		ASTCENC_PRE_FASTEST,
-		2, 10, 40, 2, 2, 85.0f, 63.0f, 3.5f, 3.5f, 1.0f, 1.0f, 0.5f, 20
+		2, 10, 6, 4, 40, 2, 2, 2, 2, 2, 85.0f, 63.0f, 3.5f, 3.5f, 1.0f, 1.0f, 0.80f
 	}, {
 		ASTCENC_PRE_FAST,
-		2, 15, 55, 3, 3, 85.0f, 63.0f, 3.5f, 3.5f, 1.0f, 1.1f, 0.5f, 16
+		2, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.0f, 63.0f, 3.5f, 3.5f, 1.0f, 1.0f, 0.85f
 	}, {
 		ASTCENC_PRE_MEDIUM,
-		3, 30, 76, 3, 3, 95.0f, 70.0f, 3.5f, 3.5f, 1.2f, 1.25f, 0.65f, 12
+		3, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.5f, 3.5f, 1.1f, 1.05f, 0.90f
 	}, {
 		ASTCENC_PRE_THOROUGH,
-		4, 75, 92, 4, 4, 105.0f, 77.0f, 10.0f, 10.0f, 2.5f, 1.25f, 0.85f, 10
+		4, 82, 60, 30, 93, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 10.0f, 1.3f, 1.2f, 0.97f
+	}, {
+		ASTCENC_PRE_VERYTHOROUGH,
+		4, 256, 128, 64, 98, 4, 6, 9, 5, 2, 200.0f, 200.0f, 10.0f, 10.0f, 1.6f, 1.4f, 0.98f
 	}, {
 		ASTCENC_PRE_EXHAUSTIVE,
-		4, 1024, 100, 4, 4, 200.0f, 200.0f, 10.0f, 10.0f, 10.0f, 10.0f, 0.99f, 0
+		4, 256, 256, 256, 100, 4, 8, 32, 32, 32, 200.0f, 200.0f, 10.0f, 10.0f, 2.0f, 2.0f, 0.99f
 	}
 }};
 
@@ -422,10 +434,15 @@ static astcenc_error validate_config(
 	config.rgbm_m_scale = astc::max(config.rgbm_m_scale, 1.0f);
 
 	config.tune_partition_count_limit = astc::clamp(config.tune_partition_count_limit, 1u, 4u);
-	config.tune_partition_index_limit = astc::clamp(config.tune_partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS);
+	config.tune_2partition_index_limit = astc::clamp(config.tune_2partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS);
+	config.tune_3partition_index_limit = astc::clamp(config.tune_3partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS);
+	config.tune_4partition_index_limit = astc::clamp(config.tune_4partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS);
 	config.tune_block_mode_limit = astc::clamp(config.tune_block_mode_limit, 1u, 100u);
 	config.tune_refinement_limit = astc::max(config.tune_refinement_limit, 1u);
 	config.tune_candidate_limit = astc::clamp(config.tune_candidate_limit, 1u, TUNE_MAX_TRIAL_CANDIDATES);
+	config.tune_2partitioning_candidate_limit = astc::clamp(config.tune_2partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIIONING_CANDIDATES);
+	config.tune_3partitioning_candidate_limit = astc::clamp(config.tune_3partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIIONING_CANDIDATES);
+	config.tune_4partitioning_candidate_limit = astc::clamp(config.tune_4partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIIONING_CANDIDATES);
 	config.tune_db_limit = astc::max(config.tune_db_limit, 0.0f);
 	config.tune_mode0_mse_overshoot = astc::max(config.tune_mode0_mse_overshoot, 1.0f);
 	config.tune_refinement_mse_overshoot = astc::max(config.tune_refinement_mse_overshoot, 1.0f);
@@ -464,9 +481,23 @@ astcenc_error astcenc_config_init(
 	astcenc_config* configp
 ) {
 	astcenc_error status;
-	astcenc_config& config = *configp;
+
+	// Check basic library compatibility options here so they are checked early. Note, these checks
+	// are repeated in context_alloc for cases where callers use a manually defined config struct
+	status = validate_cpu_isa();
+	if (status != ASTCENC_SUCCESS)
+	{
+		return status;
+	}
+
+	status = validate_cpu_float();
+	if (status != ASTCENC_SUCCESS)
+	{
+		return status;
+	}
 
 	// Zero init all config fields; although most of will be over written
+	astcenc_config& config = *configp;
 	std::memset(&config, 0, sizeof(config));
 
 	// Process the block size
@@ -493,7 +524,7 @@ astcenc_error astcenc_config_init(
 		return ASTCENC_ERR_BAD_QUALITY;
 	}
 
-	static const std::array<astcenc_preset_config, 5>* preset_configs;
+	static const std::array<astcenc_preset_config, 6>* preset_configs;
 	int texels_int = block_x * block_y * block_z;
 	if (texels_int < 25)
 	{
@@ -525,11 +556,15 @@ astcenc_error astcenc_config_init(
 	if (start == end)
 	{
 		config.tune_partition_count_limit = (*preset_configs)[start].tune_partition_count_limit;
-		config.tune_partition_index_limit = (*preset_configs)[start].tune_partition_index_limit;
+		config.tune_2partition_index_limit = (*preset_configs)[start].tune_2partition_index_limit;
+		config.tune_3partition_index_limit = (*preset_configs)[start].tune_3partition_index_limit;
+		config.tune_4partition_index_limit = (*preset_configs)[start].tune_4partition_index_limit;
 		config.tune_block_mode_limit = (*preset_configs)[start].tune_block_mode_limit;
 		config.tune_refinement_limit = (*preset_configs)[start].tune_refinement_limit;
-		config.tune_candidate_limit = astc::min((*preset_configs)[start].tune_candidate_limit,
-		                                        TUNE_MAX_TRIAL_CANDIDATES);
+		config.tune_candidate_limit = astc::min((*preset_configs)[start].tune_candidate_limit, TUNE_MAX_TRIAL_CANDIDATES);
+		config.tune_2partitioning_candidate_limit = astc::min((*preset_configs)[start].tune_2partitioning_candidate_limit, TUNE_MAX_PARTITIIONING_CANDIDATES);
+		config.tune_3partitioning_candidate_limit = astc::min((*preset_configs)[start].tune_3partitioning_candidate_limit, TUNE_MAX_PARTITIIONING_CANDIDATES);
+		config.tune_4partitioning_candidate_limit = astc::min((*preset_configs)[start].tune_4partitioning_candidate_limit, TUNE_MAX_PARTITIIONING_CANDIDATES);
 		config.tune_db_limit = astc::max((*preset_configs)[start].tune_db_limit_a_base - 35 * ltexels,
 		                                 (*preset_configs)[start].tune_db_limit_b_base - 19 * ltexels);
 
@@ -539,7 +574,6 @@ astcenc_error astcenc_config_init(
 		config.tune_2_partition_early_out_limit_factor = (*preset_configs)[start].tune_2_partition_early_out_limit_factor;
 		config.tune_3_partition_early_out_limit_factor =(*preset_configs)[start].tune_3_partition_early_out_limit_factor;
 		config.tune_2_plane_early_out_limit_correlation = (*preset_configs)[start].tune_2_plane_early_out_limit_correlation;
-		config.tune_low_weight_count_limit = (*preset_configs)[start].tune_low_weight_count_limit;
 	}
 	// Start and end node are not the same - so interpolate between them
 	else
@@ -561,11 +595,19 @@ astcenc_error astcenc_config_init(
 		#define LERPUI(param) static_cast<unsigned int>(LERPI(param))
 
 		config.tune_partition_count_limit = LERPI(tune_partition_count_limit);
-		config.tune_partition_index_limit = LERPI(tune_partition_index_limit);
+		config.tune_2partition_index_limit = LERPI(tune_2partition_index_limit);
+		config.tune_3partition_index_limit = LERPI(tune_3partition_index_limit);
+		config.tune_4partition_index_limit = LERPI(tune_4partition_index_limit);
 		config.tune_block_mode_limit = LERPI(tune_block_mode_limit);
 		config.tune_refinement_limit = LERPI(tune_refinement_limit);
 		config.tune_candidate_limit = astc::min(LERPUI(tune_candidate_limit),
 		                                        TUNE_MAX_TRIAL_CANDIDATES);
+		config.tune_2partitioning_candidate_limit = astc::min(LERPUI(tune_2partitioning_candidate_limit),
+		                                                      BLOCK_MAX_PARTITIONINGS);
+		config.tune_3partitioning_candidate_limit = astc::min(LERPUI(tune_3partitioning_candidate_limit),
+		                                                      BLOCK_MAX_PARTITIONINGS);
+		config.tune_4partitioning_candidate_limit = astc::min(LERPUI(tune_4partitioning_candidate_limit),
+		                                                      BLOCK_MAX_PARTITIONINGS);
 		config.tune_db_limit = astc::max(LERP(tune_db_limit_a_base) - 35 * ltexels,
 		                                 LERP(tune_db_limit_b_base) - 19 * ltexels);
 
@@ -575,7 +617,6 @@ astcenc_error astcenc_config_init(
 		config.tune_2_partition_early_out_limit_factor = LERP(tune_2_partition_early_out_limit_factor);
 		config.tune_3_partition_early_out_limit_factor = LERP(tune_3_partition_early_out_limit_factor);
 		config.tune_2_plane_early_out_limit_correlation = LERP(tune_2_plane_early_out_limit_correlation);
-		config.tune_low_weight_count_limit = LERPI(tune_low_weight_count_limit);
 		#undef LERP
 		#undef LERPI
 		#undef LERPUI
@@ -676,13 +717,13 @@ astcenc_error astcenc_context_alloc(
 	astcenc_error status;
 	const astcenc_config& config = *configp;
 
-	status = validate_cpu_float();
+	status = validate_cpu_isa();
 	if (status != ASTCENC_SUCCESS)
 	{
 		return status;
 	}
 
-	status = validate_cpu_isa();
+	status = validate_cpu_float();
 	if (status != ASTCENC_SUCCESS)
 	{
 		return status;
@@ -714,7 +755,7 @@ astcenc_error astcenc_context_alloc(
 	status = validate_config(ctx->config);
 	if (status != ASTCENC_SUCCESS)
 	{
-		delete ctx;
+		delete ctxo;
 		return status;
 	}
 
diff --git a/3rdparty/astc-encoder/source/astcenc_find_best_partitioning.cpp b/3rdparty/astc-encoder/source/astcenc_find_best_partitioning.cpp
index 2ea3e43..c9e1835 100644
--- a/3rdparty/astc-encoder/source/astcenc_find_best_partitioning.cpp
+++ b/3rdparty/astc-encoder/source/astcenc_find_best_partitioning.cpp
@@ -485,13 +485,59 @@ static unsigned int compute_kmeans_partition_ordering(
 	    mismatch_counts, partition_ordering);
 }
 
+/**
+ * @brief Insert a partitioning into an order list of results, sorted by error.
+ *
+ * @param      max_values      The max number of entries in the best result arrays/
+ * @param      this_error      The error of the new entry.
+ * @param      this_partition  The partition ID of the new entry.
+ * @param[out] best_errors     The array of best error values.
+ * @param[out] best_partitions The array of best partition values.
+ */
+static void insert_result(
+	unsigned int max_values,
+	float this_error,
+	unsigned int this_partition,
+	float* best_errors,
+	unsigned int* best_partitions)
+{
+	// Don't bother searching if the current worst error beats the new error
+	if (this_error >= best_errors[max_values - 1])
+	{
+		return;
+	}
+
+	// Else insert into the list in error-order
+	for (unsigned int i = 0; i < max_values;  i++)
+	{
+		// Existing result is better - move on ...
+		if (this_error > best_errors[i])
+		{
+			continue;
+		}
+
+		// Move existing results down one
+		for (unsigned int j = max_values - 1; j > i; j--)
+		{
+			best_errors[j] = best_errors[j - 1];
+			best_partitions[j] = best_partitions[j - 1];
+		}
+
+		// Insert new result
+		best_errors[i] = this_error;
+		best_partitions[i] = this_partition;
+		break;
+	}
+}
+
 /* See header for documentation. */
-void find_best_partition_candidates(
+unsigned int find_best_partition_candidates(
 	const block_size_descriptor& bsd,
 	const image_block& blk,
 	unsigned int partition_count,
 	unsigned int partition_search_limit,
-	unsigned int best_partitions[2]
+	unsigned int best_partitions[BLOCK_MAX_PARTITIONINGS],
+	unsigned int requested_candidates
 ) {
 	// Constant used to estimate quantization error for a given partitioning; the optimal value for
 	// this depends on bitrate. These values have been determined empirically.
@@ -518,17 +564,23 @@ void find_best_partition_candidates(
 	unsigned int partition_sequence[BLOCK_MAX_PARTITIONINGS];
 	unsigned int sequence_len = compute_kmeans_partition_ordering(bsd, blk, partition_count, partition_sequence);
 	partition_search_limit = astc::min(partition_search_limit, sequence_len);
+	requested_candidates = astc::min(partition_search_limit, requested_candidates);
 
 	bool uses_alpha = !blk.is_constant_channel(3);
 
 	// Partitioning errors assuming uncorrelated-chrominance endpoints
-	float uncor_best_error { ERROR_CALC_DEFAULT };
-	unsigned int uncor_best_partition { 0 };
+	float uncor_best_errors[TUNE_MAX_PARTITIIONING_CANDIDATES];
+	unsigned int uncor_best_partitions[TUNE_MAX_PARTITIIONING_CANDIDATES];
 
 	// Partitioning errors assuming same-chrominance endpoints
-	// Store two so we can always return one different to uncorr
-	float samec_best_errors[2] { ERROR_CALC_DEFAULT, ERROR_CALC_DEFAULT };
-	unsigned int samec_best_partitions[2] { 0, 0 };
+	float samec_best_errors[TUNE_MAX_PARTITIIONING_CANDIDATES];
+	unsigned int samec_best_partitions[TUNE_MAX_PARTITIIONING_CANDIDATES];
+
+	for (unsigned int i = 0; i < requested_candidates; i++)
+	{
+		uncor_best_errors[i] = ERROR_CALC_DEFAULT;
+		samec_best_errors[i] = ERROR_CALC_DEFAULT;
+	}
 
 	if (uses_alpha)
 	{
@@ -602,25 +654,8 @@ void find_best_partition_candidates(
 				samec_error += dot_s(samec_vector * samec_vector, error_weights);
 			}
 
-			if (uncor_error < uncor_best_error)
-			{
-				uncor_best_error = uncor_error;
-				uncor_best_partition = partition;
-			}
-
-			if (samec_error < samec_best_errors[0])
-			{
-				samec_best_errors[1] = samec_best_errors[0];
-				samec_best_partitions[1] = samec_best_partitions[0];
-
-				samec_best_errors[0] = samec_error;
-				samec_best_partitions[0] = partition;
-			}
-			else if (samec_error < samec_best_errors[1])
-			{
-				samec_best_errors[1] = samec_error;
-				samec_best_partitions[1] = partition;
-			}
+			insert_result(requested_candidates, uncor_error, partition, uncor_best_errors, uncor_best_partitions);
+			insert_result(requested_candidates, samec_error, partition, samec_best_errors, samec_best_partitions);
 		}
 	}
 	else
@@ -687,50 +722,55 @@ void find_best_partition_candidates(
 				samec_error += dot3_s(samec_vector * samec_vector, error_weights);
 			}
 
-			if (uncor_error < uncor_best_error)
-			{
-				uncor_best_error = uncor_error;
-				uncor_best_partition = partition;
-			}
+			insert_result(requested_candidates, uncor_error, partition, uncor_best_errors, uncor_best_partitions);
+			insert_result(requested_candidates, samec_error, partition, samec_best_errors, samec_best_partitions);
+		}
+	}
 
-			if (samec_error < samec_best_errors[0])
-			{
-				samec_best_errors[1] = samec_best_errors[0];
-				samec_best_partitions[1] = samec_best_partitions[0];
+	bool best_is_uncor = uncor_best_partitions[0] > samec_best_partitions[0];
 
-				samec_best_errors[0] = samec_error;
-				samec_best_partitions[0] = partition;
-			}
-			else if (samec_error < samec_best_errors[1])
+	unsigned int interleave[2 * TUNE_MAX_PARTITIIONING_CANDIDATES];
+	for (unsigned int i = 0; i < requested_candidates; i++)
+	{
+		if (best_is_uncor)
+		{
+			interleave[2 * i] = bsd.get_raw_partition_info(partition_count, uncor_best_partitions[i]).partition_index;
+			interleave[2 * i + 1] = bsd.get_raw_partition_info(partition_count, samec_best_partitions[i]).partition_index;
+		}
+		else
+		{
+			interleave[2 * i] = bsd.get_raw_partition_info(partition_count, samec_best_partitions[i]).partition_index;
+			interleave[2 * i + 1] = bsd.get_raw_partition_info(partition_count, uncor_best_partitions[i]).partition_index;
+		}
+	}
+
+	uint64_t bitmasks[1024/64] { 0 };
+	unsigned int emitted = 0;
+
+	// Deduplicate the first "requested" entries
+	for (unsigned int i = 0; i < requested_candidates * 2;  i++)
+	{
+		unsigned int partition = interleave[i];
+
+		unsigned int word = partition / 64;
+		unsigned int bit = partition % 64;
+
+		bool written = bitmasks[word] & (1ull << bit);
+
+		if (!written)
+		{
+			best_partitions[emitted] = partition;
+			bitmasks[word] |= 1ull << bit;
+			emitted++;
+
+			if (emitted == requested_candidates)
 			{
-				samec_best_errors[1] = samec_error;
-				samec_best_partitions[1] = partition;
+				break;
 			}
 		}
 	}
 
-	// Same partition is best for both, so use this first unconditionally
-	if (uncor_best_partition == samec_best_partitions[0])
-	{
-		best_partitions[0] = samec_best_partitions[0];
-		best_partitions[1] = samec_best_partitions[1];
-	}
-	// Uncor is best
-	else if (uncor_best_error <= samec_best_errors[0])
-	{
-		best_partitions[0] = uncor_best_partition;
-		best_partitions[1] = samec_best_partitions[0];
-	}
-	// Samec is best
-	else
-	{
-		best_partitions[0] = samec_best_partitions[0];
-		best_partitions[1] = uncor_best_partition;
-	}
-
-	// Convert these back into canonical partition IDs for the rest of the codec
-	best_partitions[0] = bsd.get_raw_partition_info(partition_count, best_partitions[0]).partition_index;
-	best_partitions[1] = bsd.get_raw_partition_info(partition_count, best_partitions[1]).partition_index;
+	return emitted;
 }
 
 #endif
diff --git a/3rdparty/astc-encoder/source/astcenc_internal.h b/3rdparty/astc-encoder/source/astcenc_internal.h
index 33b12eb..6ec395f 100644
--- a/3rdparty/astc-encoder/source/astcenc_internal.h
+++ b/3rdparty/astc-encoder/source/astcenc_internal.h
@@ -130,7 +130,14 @@ static constexpr unsigned int TUNE_MIN_TEXELS_MODE0_FASTPATH { 24 };
  *
  * This can be dynamically reduced by the compression quality preset.
  */
-static constexpr unsigned int TUNE_MAX_TRIAL_CANDIDATES { 4 };
+static constexpr unsigned int TUNE_MAX_TRIAL_CANDIDATES { 8 };
+
+/**
+ * @brief The maximum number of candidate partitionings tested for each encoding mode.
+ *
+ * This can be dynamically reduced by the compression quality preset.
+ */
+static constexpr unsigned int TUNE_MAX_PARTITIIONING_CANDIDATES { 32 };
 
 /**
  * @brief The maximum quant level using full angular endpoint search method.
@@ -1345,11 +1352,11 @@ extern const int8_t quant_mode_table[10][128];
  * Note that BISE can return strings that are not a whole number of bytes in length, and ASTC can
  * start storing strings in a block at arbitrary bit offsets in the encoded data.
  *
- * @param         quant_level      The BISE alphabet size.
- * @param         character_count  The number of characters in the string.
- * @param         input_data       The unpacked string, one byte per character.
- * @param[in,out] output_data      The output packed string.
- * @param         bit_offset       The starting offset in the output storage.
+ * @param         quant_level       The BISE alphabet size.
+ * @param         character_count   The number of characters in the string.
+ * @param         input_data        The unpacked string, one byte per character.
+ * @param[in,out] output_data       The output packed string.
+ * @param         bit_offset        The starting offset in the output storage.
  */
 void encode_ise(
 	quant_method quant_level,
@@ -1436,11 +1443,11 @@ void compute_avgs_and_dirs_3_comp(
  * This is a specialization of @c compute_avgs_and_dirs_3_comp where the omitted component is
  * always alpha, a common case during partition search.
  *
- * @param      pi                  The partition info for the current trial.
- * @param      blk                 The image block color data to be compressed.
- * @param[out] pm                  The output partition metrics.
- *                                 - Only pi.partition_count array entries actually get initialized.
- *                                 - Direction vectors @c pm.dir are not normalized.
+ * @param      pi    The partition info for the current trial.
+ * @param      blk   The image block color data to be compressed.
+ * @param[out] pm    The output partition metrics.
+ *                   - Only pi.partition_count array entries actually get initialized.
+ *                   - Direction vectors @c pm.dir are not normalized.
  */
 void compute_avgs_and_dirs_3_comp_rgb(
 	const partition_info& pi,
@@ -1471,11 +1478,11 @@ void compute_avgs_and_dirs_4_comp(
  *
  * This function computes the squared error when using these two representations.
  *
- * @param         pi              The partition info for the current trial.
- * @param         blk             The image block color data to be compressed.
- * @param[in,out] plines          Processed line inputs, and line length outputs.
- * @param[out]    uncor_error     The cumulative error for using the uncorrelated line.
- * @param[out]    samec_error     The cumulative error for using the same chroma line.
+ * @param         pi            The partition info for the current trial.
+ * @param         blk           The image block color data to be compressed.
+ * @param[in,out] plines        Processed line inputs, and line length outputs.
+ * @param[out]    uncor_error   The cumulative error for using the uncorrelated line.
+ * @param[out]    samec_error   The cumulative error for using the same chroma line.
  */
 void compute_error_squared_rgb(
 	const partition_info& pi,
@@ -1520,18 +1527,23 @@ void compute_error_squared_rgba(
  * candidates; one assuming data has uncorrelated chroma and one assuming the
  * data has correlated chroma. The best candidate is returned first in the list.
  *
- * @param      bsd                        The block size information.
- * @param      blk                        The image block color data to compress.
- * @param      partition_count            The number of partitions in the block.
- * @param      partition_search_limit     The number of candidate partition encodings to trial.
- * @param[out] best_partitions            The best partition candidates.
+ * @param      bsd                      The block size information.
+ * @param      blk                      The image block color data to compress.
+ * @param      partition_count          The number of partitions in the block.
+ * @param      partition_search_limit   The number of candidate partition encodings to trial.
+ * @param[out] best_partitions          The best partition candidates.
+ * @param      requested_candidates     The number of requsted partitionings. May return fewer if
+ *                                      candidates are not avaiable.
+ *
+ * @return The actual number of candidates returned.
  */
-void find_best_partition_candidates(
+unsigned int find_best_partition_candidates(
 	const block_size_descriptor& bsd,
 	const image_block& blk,
 	unsigned int partition_count,
 	unsigned int partition_search_limit,
-	unsigned int best_partitions[2]);
+	unsigned int best_partitions[TUNE_MAX_PARTITIIONING_CANDIDATES],
+	unsigned int requested_candidates);
 
 /* ============================================================================
   Functionality for managing images and image related data.
@@ -1545,10 +1557,10 @@ void find_best_partition_candidates(
  *
  * Results are written back into @c img->input_alpha_averages.
  *
- * @param      img                     The input image data, also holds output data.
- * @param      alpha_kernel_radius     The kernel radius (in pixels) for alpha mods.
- * @param      swz                     Input data component swizzle.
- * @param[out] ag                      The average variance arguments to init.
+ * @param      img                   The input image data, also holds output data.
+ * @param      alpha_kernel_radius   The kernel radius (in pixels) for alpha mods.
+ * @param      swz                   Input data component swizzle.
+ * @param[out] ag                    The average variance arguments to init.
  *
  * @return The number of tasks in the processing stage.
  */
@@ -1766,13 +1778,13 @@ float compute_error_of_weight_set_2planes(
  * The user requests a base color endpoint mode in @c format, but the quantizer may choose a
  * delta-based representation. It will report back the format variant it actually used.
  *
- * @param      color0       The input unquantized color0 endpoint for absolute endpoint pairs.
- * @param      color1       The input unquantized color1 endpoint for absolute endpoint pairs.
- * @param      rgbs_color   The input unquantized RGBS variant endpoint for same chroma endpoints.
- * @param      rgbo_color   The input unquantized RGBS variant endpoint for HDR endpoints..
- * @param      format       The desired base format.
- * @param[out] output       The output storage for the quantized colors/
- * @param      quant_level  The quantization level requested.
+ * @param      color0        The input unquantized color0 endpoint for absolute endpoint pairs.
+ * @param      color1        The input unquantized color1 endpoint for absolute endpoint pairs.
+ * @param      rgbs_color    The input unquantized RGBS variant endpoint for same chroma endpoints.
+ * @param      rgbo_color    The input unquantized RGBS variant endpoint for HDR endpoints.
+ * @param      format        The desired base format.
+ * @param[out] output        The output storage for the quantized colors/
+ * @param      quant_level   The quantization level requested.
  *
  * @return The actual endpoint mode used.
  */
@@ -1873,13 +1885,13 @@ unsigned int compute_ideal_endpoint_formats(
  * As we quantize and decimate weights the optimal endpoint colors may change slightly, so we must
  * recompute the ideal colors for a specific weight set.
  *
- * @param         blk                        The image block color data to compress.
- * @param         pi                         The partition info for the current trial.
- * @param         di                         The weight grid decimation table.
+ * @param         blk                  The image block color data to compress.
+ * @param         pi                   The partition info for the current trial.
+ * @param         di                   The weight grid decimation table.
  * @param         dec_weights_uquant   The quantized weight set.
- * @param[in,out] ep                         The color endpoints (modifed in place).
- * @param[out]    rgbs_vectors               The RGB+scale vectors for LDR blocks.
- * @param[out]    rgbo_vectors               The RGB+offset vectors for HDR blocks.
+ * @param[in,out] ep                   The color endpoints (modifed in place).
+ * @param[out]    rgbs_vectors         The RGB+scale vectors for LDR blocks.
+ * @param[out]    rgbo_vectors         The RGB+offset vectors for HDR blocks.
  */
 void recompute_ideal_colors_1plane(
 	const image_block& blk,
@@ -1896,15 +1908,15 @@ void recompute_ideal_colors_1plane(
  * As we quantize and decimate weights the optimal endpoint colors may change slightly, so we must
  * recompute the ideal colors for a specific weight set.
  *
- * @param         blk                               The image block color data to compress.
- * @param         bsd                               The block_size descriptor.
- * @param         di                                The weight grid decimation table.
+ * @param         blk                         The image block color data to compress.
+ * @param         bsd                         The block_size descriptor.
+ * @param         di                          The weight grid decimation table.
  * @param         dec_weights_uquant_plane1   The quantized weight set for plane 1.
  * @param         dec_weights_uquant_plane2   The quantized weight set for plane 2.
- * @param[in,out] ep                                The color endpoints (modifed in place).
- * @param[out]    rgbs_vector                       The RGB+scale color for LDR blocks.
- * @param[out]    rgbo_vector                       The RGB+offset color for HDR blocks.
- * @param         plane2_component                  The component assigned to plane 2.
+ * @param[in,out] ep                          The color endpoints (modifed in place).
+ * @param[out]    rgbs_vector                 The RGB+scale color for LDR blocks.
+ * @param[out]    rgbo_vector                 The RGB+offset color for HDR blocks.
+ * @param         plane2_component            The component assigned to plane 2.
  */
 void recompute_ideal_colors_2planes(
 	const image_block& blk,
@@ -1925,15 +1937,13 @@ void prepare_angular_tables();
 /**
  * @brief Compute the angular endpoints for one plane for each block mode.
  *
- * @param      tune_low_weight_limit     Weight count cutoff below which we use simpler searches.
- * @param      only_always               Only consider block modes that are always enabled.
- * @param      bsd                       The block size descriptor for the current trial.
- * @param      dec_weight_ideal_value    The ideal decimated unquantized weight values.
- * @param      max_weight_quant          The maximum block mode weight quantization allowed.
- * @param[out] tmpbuf                    Preallocated scratch buffers for the compressor.
+ * @param      only_always              Only consider block modes that are always enabled.
+ * @param      bsd                      The block size descriptor for the current trial.
+ * @param      dec_weight_ideal_value   The ideal decimated unquantized weight values.
+ * @param      max_weight_quant         The maximum block mode weight quantization allowed.
+ * @param[out] tmpbuf                   Preallocated scratch buffers for the compressor.
  */
 void compute_angular_endpoints_1plane(
-	unsigned int tune_low_weight_limit,
 	bool only_always,
 	const block_size_descriptor& bsd,
 	const float* dec_weight_ideal_value,
@@ -1943,14 +1953,12 @@ void compute_angular_endpoints_1plane(
 /**
  * @brief Compute the angular endpoints for two planes for each block mode.
  *
- * @param      tune_low_weight_limit     Weight count cutoff below which we use simpler searches.
- * @param      bsd                       The block size descriptor for the current trial.
- * @param      dec_weight_ideal_value    The ideal decimated unquantized weight values.
- * @param      max_weight_quant          The maximum block mode weight quantization allowed.
- * @param[out] tmpbuf                    Preallocated scratch buffers for the compressor.
+ * @param      bsd                      The block size descriptor for the current trial.
+ * @param      dec_weight_ideal_value   The ideal decimated unquantized weight values.
+ * @param      max_weight_quant         The maximum block mode weight quantization allowed.
+ * @param[out] tmpbuf                   Preallocated scratch buffers for the compressor.
  */
 void compute_angular_endpoints_2planes(
-	unsigned int tune_low_weight_limit,
 	const block_size_descriptor& bsd,
 	const float* dec_weight_ideal_value,
 	unsigned int max_weight_quant,
@@ -2162,18 +2170,4 @@ void aligned_free(T* ptr)
 #endif
 }
 
-static inline void dump_weights(const char* label, uint8_t* weights, int weight_count)
-{
-	printf("%s\n", label);
-	vint lane = vint::lane_id();
-	for (int i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH)
-	{
-		vmask mask = lane < vint(weight_count);
-		vint val(weights + i);
-		val = select(vint::zero(), val, mask);
-		print(val);
-		lane += vint(ASTCENC_SIMD_WIDTH);
-	}
-}
-
 #endif
diff --git a/3rdparty/astc-encoder/source/astcenc_vecmathlib_common_4.h b/3rdparty/astc-encoder/source/astcenc_vecmathlib_common_4.h
index 2609c8f..86ee4fd 100644
--- a/3rdparty/astc-encoder/source/astcenc_vecmathlib_common_4.h
+++ b/3rdparty/astc-encoder/source/astcenc_vecmathlib_common_4.h
@@ -361,6 +361,23 @@ static inline int popcount(uint64_t v)
 
 #endif
 
+/**
+ * @brief Apply signed bit transfer.
+ *
+ * @param input0   The first encoded endpoint.
+ * @param input1   The second encoded endpoint.
+ */
+static ASTCENC_SIMD_INLINE void bit_transfer_signed(
+	vint4& input0,
+	vint4& input1
+) {
+	input1 = lsr<1>(input1) | (input0 & 0x80);
+	input0 = lsr<1>(input0) & 0x3F;
+
+	vmask4 mask = (input0 & 0x20) != vint4::zero();
+	input0 = select(input0, input0 - 0x40, mask);
+}
+
 /**
  * @brief Debug function to print a vector of ints.
  */
diff --git a/3rdparty/astc-encoder/source/astcenc_weight_align.cpp b/3rdparty/astc-encoder/source/astcenc_weight_align.cpp
index f066cb7..e40a318 100644
--- a/3rdparty/astc-encoder/source/astcenc_weight_align.cpp
+++ b/3rdparty/astc-encoder/source/astcenc_weight_align.cpp
@@ -333,156 +333,8 @@ static void compute_angular_endpoints_for_quant_levels(
 	}
 }
 
-/**
- * @brief For a given step size compute the lowest and highest weight, variant for low weight count.
- *
- * Compute the lowest and highest weight that results from quantizing using the given stepsize and
- * offset, and then compute the resulting error. The cut errors indicate the error that results from
- * forcing samples that should have had one weight value one step up or down.
- *
- * @param      weight_count              The number of (decimated) weights.
- * @param      dec_weight_quant_uvalue   The decimated and quantized weight values.
- * @param      max_angular_steps         The maximum number of steps to be tested.
- * @param      max_quant_steps           The maximum quantization level to be tested.
- * @param      offsets                   The angular offsets array.
- * @param[out] lowest_weight             Per angular step, the lowest weight.
- * @param[out] weight_span               Per angular step, the span between lowest and highest weight.
- * @param[out] error                     Per angular step, the error.
- */
-static void compute_lowest_and_highest_weight_lwc(
-	unsigned int weight_count,
-	const float* dec_weight_quant_uvalue,
-	unsigned int max_angular_steps,
-	unsigned int max_quant_steps,
-	const float* offsets,
-	float* lowest_weight,
-	int* weight_span,
-	float* error
-) {
-	promise(weight_count > 0);
-	promise(max_angular_steps > 0);
-
-	vfloat rcp_stepsize = vfloat::lane_id() + vfloat(1.0f);
-
-	// Arrays are ANGULAR_STEPS long, so always safe to run full vectors
-	for (unsigned int sp = 0; sp < max_angular_steps; sp += ASTCENC_SIMD_WIDTH)
-	{
-		vfloat minidx(128.0f);
-		vfloat maxidx(-128.0f);
-		vfloat errval = vfloat::zero();
-		vfloat offset = loada(offsets + sp);
-
-		for (unsigned int j = 0; j < weight_count; j++)
-		{
-			vfloat sval = load1(dec_weight_quant_uvalue + j) * rcp_stepsize - offset;
-			vfloat svalrte = round(sval);
-			vfloat diff = sval - svalrte;
-			errval += diff * diff;
-
-			// Compute min and max quantized weight spans for each step
-			minidx = min(minidx, svalrte);
-			maxidx = max(maxidx, svalrte);
-		}
-
-		// Write out min weight and weight span; clamp span to a usable range
-		vint span = float_to_int(maxidx - minidx + vfloat(1.0f));
-		span = min(span, vint(max_quant_steps + 3));
-		span = max(span, vint(2));
-		storea(minidx, lowest_weight + sp);
-		storea(span, weight_span + sp);
-
-		vfloat ssize = 1.0f / rcp_stepsize;
-		vfloat errscale = ssize * ssize;
-		storea(errval * errscale, error + sp);
-
-		rcp_stepsize = rcp_stepsize + vfloat(ASTCENC_SIMD_WIDTH);
-	}
-}
-
-/**
- * @brief The main function for the angular algorithm, variant for low weight count.
- *
- * @param      weight_count              The number of (decimated) weights.
- * @param      dec_weight_ideal_value    The ideal decimated unquantized weight values.
- * @param      max_quant_level           The maximum quantization level to be tested.
- * @param[out] low_value                 Per angular step, the lowest weight value.
- * @param[out] high_value                Per angular step, the highest weight value.
- */
-static void compute_angular_endpoints_for_quant_levels_lwc(
-	unsigned int weight_count,
-	const float* dec_weight_ideal_value,
-	unsigned int max_quant_level,
-	float low_value[TUNE_MAX_ANGULAR_QUANT + 1],
-	float high_value[TUNE_MAX_ANGULAR_QUANT + 1]
-) {
-	unsigned int max_quant_steps = steps_for_quant_level[max_quant_level];
-	unsigned int max_angular_steps = steps_for_quant_level[max_quant_level];
-
-	alignas(ASTCENC_VECALIGN) float angular_offsets[ANGULAR_STEPS];
-	alignas(ASTCENC_VECALIGN) float lowest_weight[ANGULAR_STEPS];
-	alignas(ASTCENC_VECALIGN) int32_t weight_span[ANGULAR_STEPS];
-	alignas(ASTCENC_VECALIGN) float error[ANGULAR_STEPS];
-
-	compute_angular_offsets(weight_count, dec_weight_ideal_value,
-	                        max_angular_steps, angular_offsets);
-
-
-	compute_lowest_and_highest_weight_lwc(weight_count, dec_weight_ideal_value,
-	                                      max_angular_steps, max_quant_steps,
-	                                      angular_offsets, lowest_weight, weight_span, error);
-
-	// For each quantization level, find the best error terms. Use packed vectors so data-dependent
-	// branches can become selects. This involves some integer to float casts, but the values are
-	// small enough so they never round the wrong way.
-	vfloat4 best_results[36];
-
-	// Initialize the array to some safe defaults
-	promise(max_quant_steps > 0);
-	for (unsigned int i = 0; i < (max_quant_steps + 4); i++)
-	{
-		best_results[i] = vfloat4(ERROR_CALC_DEFAULT, -1.0f, 0.0f, 0.0f);
-	}
-
-	promise(max_angular_steps > 0);
-	for (unsigned int i = 0; i < max_angular_steps; i++)
-	{
-		int idx_span = weight_span[i];
-
-		// Check best error against record N
-		vfloat4 current_best = best_results[idx_span];
-		vfloat4 candidate = vfloat4(error[i], static_cast<float>(i), 0.0f, 0.0f);
-		vmask4 mask = vfloat4(current_best.lane<0>()) > vfloat4(error[i]);
-		best_results[idx_span] = select(current_best, candidate, mask);
-	}
-
-	for (unsigned int i = 0; i <= max_quant_level; i++)
-	{
-		unsigned int q = steps_for_quant_level[i];
-		int bsi = static_cast<int>(best_results[q].lane<1>());
-
-		// Did we find anything?
-#if defined(ASTCENC_DIAGNOSTICS)
-		if ((bsi < 0) && print_once)
-		{
-			print_once = false;
-			printf("INFO: Unable to find low weight encoding within search error limit.\n\n");
-		}
-#endif
-
-		bsi = astc::max(0, bsi);
-
-		float lwi = lowest_weight[bsi];
-		float hwi = lwi + static_cast<float>(q) - 1.0f;
-
-		float stepsize = 1.0f / (1.0f + static_cast<float>(bsi));
-		low_value[i]  = (angular_offsets[bsi] + lwi) * stepsize;
-		high_value[i] = (angular_offsets[bsi] + hwi) * stepsize;
-	}
-}
-
 /* See header for documentation. */
 void compute_angular_endpoints_1plane(
-	unsigned int tune_low_weight_limit,
 	bool only_always,
 	const block_size_descriptor& bsd,
 	const float* dec_weight_ideal_value,
@@ -519,20 +371,10 @@ void compute_angular_endpoints_1plane(
 			max_precision = max_weight_quant;
 		}
 
-		if (weight_count < tune_low_weight_limit)
-		{
-			compute_angular_endpoints_for_quant_levels_lwc(
-				weight_count,
-				dec_weight_ideal_value + i * BLOCK_MAX_WEIGHTS,
-				max_precision, low_values[i], high_values[i]);
-		}
-		else
-		{
-			compute_angular_endpoints_for_quant_levels(
-				weight_count,
-				dec_weight_ideal_value + i * BLOCK_MAX_WEIGHTS,
-				max_precision, low_values[i], high_values[i]);
-		}
+		compute_angular_endpoints_for_quant_levels(
+		    weight_count,
+		    dec_weight_ideal_value + i * BLOCK_MAX_WEIGHTS,
+		    max_precision, low_values[i], high_values[i]);
 	}
 
 	unsigned int max_block_modes = only_always ? bsd.block_mode_count_1plane_always
@@ -561,7 +403,6 @@ void compute_angular_endpoints_1plane(
 
 /* See header for documentation. */
 void compute_angular_endpoints_2planes(
-	unsigned int tune_low_weight_limit,
 	const block_size_descriptor& bsd,
 	const float* dec_weight_ideal_value,
 	unsigned int max_weight_quant,
@@ -599,30 +440,15 @@ void compute_angular_endpoints_2planes(
 			max_precision = max_weight_quant;
 		}
 
-		if (weight_count < tune_low_weight_limit)
-		{
-			compute_angular_endpoints_for_quant_levels_lwc(
-				weight_count,
-				dec_weight_ideal_value + i * BLOCK_MAX_WEIGHTS,
-				max_precision, low_values1[i], high_values1[i]);
+		compute_angular_endpoints_for_quant_levels(
+		    weight_count,
+		    dec_weight_ideal_value + i * BLOCK_MAX_WEIGHTS,
+		    max_precision, low_values1[i], high_values1[i]);
 
-			compute_angular_endpoints_for_quant_levels_lwc(
-				weight_count,
-				dec_weight_ideal_value + i * BLOCK_MAX_WEIGHTS + WEIGHTS_PLANE2_OFFSET,
-				max_precision, low_values2[i], high_values2[i]);
-		}
-		else
-		{
-			compute_angular_endpoints_for_quant_levels(
-				weight_count,
-				dec_weight_ideal_value + i * BLOCK_MAX_WEIGHTS,
-				max_precision, low_values1[i], high_values1[i]);
-
-			compute_angular_endpoints_for_quant_levels(
-				weight_count,
-				dec_weight_ideal_value + i * BLOCK_MAX_WEIGHTS + WEIGHTS_PLANE2_OFFSET,
-				max_precision, low_values2[i], high_values2[i]);
-		}
+		compute_angular_endpoints_for_quant_levels(
+		    weight_count,
+		    dec_weight_ideal_value + i * BLOCK_MAX_WEIGHTS + WEIGHTS_PLANE2_OFFSET,
+		    max_precision, low_values2[i], high_values2[i]);
 	}
 
 	unsigned int start = bsd.block_mode_count_1plane_selected;
diff --git a/scripts/bimg.lua b/scripts/bimg.lua
index 3b2dc99..0dde12d 100644
--- a/scripts/bimg.lua
+++ b/scripts/bimg.lua
@@ -25,6 +25,12 @@ project "bimg"
 
 	using_bx()
 
+	configuration {}
+
+	removeflags {
+		"FloatFast", -- astc-encoder doesn't work with it.
+	}
+
 	configuration { "linux-*" }
 		buildoptions {
 			"-fPIC",
diff --git a/scripts/bimg_encode.lua b/scripts/bimg_encode.lua
index 1063523..6bc48d3 100644
--- a/scripts/bimg_encode.lua
+++ b/scripts/bimg_encode.lua
@@ -6,10 +6,6 @@
 project "bimg_encode"
 	kind "StaticLib"
 
-	removeflags {
-		"FloatFast", -- astc-encoder doesn't work with it.
-	}
-
 	includedirs {
 		path.join(BIMG_DIR, "include"),
 		path.join(BIMG_DIR, "3rdparty"),
@@ -42,6 +38,12 @@ project "bimg_encode"
 
 	using_bx()
 
+	configuration {}
+
+	removeflags {
+		"FloatFast", -- astc-encoder doesn't work with it.
+	}
+
 	configuration { "linux-*" }
 		buildoptions {
 			"-fPIC",
diff --git a/src/image.cpp b/src/image.cpp
index dc6e0be..63d4625 100644
--- a/src/image.cpp
+++ b/src/image.cpp
@@ -147,7 +147,7 @@ namespace bimg
 		"ATCE",       // ATCE
 		"ATCI",       // ATCI
 		"ASTC4x4",    // ASTC4x4
-		"ASTC5x4",	  // ASTC5x4
+		"ASTC5x4",    // ASTC5x4
 		"ASTC5x5",    // ASTC5x5
 		"ASTC6x5",    // ASTC6x5
 		"ASTC6x6",    // ASTC6x6
@@ -3851,7 +3851,7 @@ namespace bimg
 #define KTX_ATC_RGB_AMD                               0x8C92
 #define KTX_ATC_RGBA_EXPLICIT_ALPHA_AMD               0x8C93
 #define KTX_ATC_RGBA_INTERPOLATED_ALPHA_AMD           0x87EE
-#define KTX_COMPRESSED_RGBA_ASTC_4x4_KHR			  0x93B0
+#define KTX_COMPRESSED_RGBA_ASTC_4x4_KHR              0x93B0
 #define KTX_COMPRESSED_RGBA_ASTC_5x4_KHR              0x93B1
 #define KTX_COMPRESSED_RGBA_ASTC_5x5_KHR              0x93B2
 #define KTX_COMPRESSED_RGBA_ASTC_6x5_KHR              0x93B3
@@ -4918,25 +4918,32 @@ namespace bimg
 		case TextureFormat::ASTC12x12:
 			if (BX_ENABLED(BIMG_DECODE_ASTC) )
 			{
-					const unsigned int thread_count = 1;
 					const bimg::ImageBlockInfo& astcBlockInfo = bimg::getBlockInfo(_srcFormat);
-					const float quality = ASTCENC_PRE_MEDIUM;
-					const astcenc_profile profile = ASTCENC_PRF_LDR; //Linear LDR color profile
-					astcenc_error status;
 
-					//Create and init config and context
 					astcenc_config config{};
-					const unsigned int astcFlags = ASTCENC_FLG_DECOMPRESS_ONLY;
-					status = astcenc_config_init(profile, astcBlockInfo.blockWidth, astcBlockInfo.blockHeight, 1, quality, astcFlags, &config);
-					if (status != ASTCENC_SUCCESS) {
+
+					astcenc_error status = astcenc_config_init(
+						  ASTCENC_PRF_LDR
+						, astcBlockInfo.blockWidth
+						, astcBlockInfo.blockHeight
+						, 1
+						, ASTCENC_PRE_MEDIUM
+						, ASTCENC_FLG_DECOMPRESS_ONLY
+						, &config
+						);
+
+					if (status != ASTCENC_SUCCESS)
+					{
 						BX_TRACE("astc error in config init %s", astcenc_get_error_string(status));
 						imageCheckerboard(_dst, _width, _height, 16, UINT32_C(0xff000000), UINT32_C(0xffffff00) );
 						break;
 					}
 
 					astcenc_context* context;
-					status = astcenc_context_alloc(&config, thread_count, &context);
-					if (status != ASTCENC_SUCCESS) {
+					status = astcenc_context_alloc(&config, 1, &context);
+
+					if (status != ASTCENC_SUCCESS)
+					{
 						BX_TRACE("astc error in context alloc %s", astcenc_get_error_string(status));
 						imageCheckerboard(_dst, _width, _height, 16, UINT32_C(0xff000000), UINT32_C(0xffffff00) );
 						break;
@@ -4944,21 +4951,36 @@ namespace bimg
 
 					//Put image data into an astcenc_image
 					astcenc_image image{};
-					image.dim_x = _width;
-					image.dim_y = _height;
-					image.dim_z = 1;
+					image.dim_x     = _width;
+					image.dim_y     = _height;
+					image.dim_z     = 1;
 					image.data_type = ASTCENC_TYPE_U8;
-					image.data = &_dst;
+					image.data      = &_dst;
+
 					const uint32_t size = imageGetSize(NULL, uint16_t(_width), uint16_t(_height), 0, false, false, 1, _srcFormat);
 
-					static const astcenc_swizzle swizzle { //0123/rgba swizzle corresponds to ASTC_RGBA
-						ASTCENC_SWZ_R, ASTCENC_SWZ_G, ASTCENC_SWZ_B, ASTCENC_SWZ_A
+					static const astcenc_swizzle swizzle
+					{   //0123/rgba swizzle corresponds to ASTC_RGBA
+						ASTCENC_SWZ_R,
+						ASTCENC_SWZ_G,
+						ASTCENC_SWZ_B,
+						ASTCENC_SWZ_A,
 					};
-					status = astcenc_decompress_image(context, static_cast<const uint8_t*>(_src), size, &image, &swizzle, 0);
-					
-					if (status != ASTCENC_SUCCESS) {
+
+					status = astcenc_decompress_image(
+						  context
+						, (const uint8_t*)_src
+						, size
+						, &image
+						, &swizzle
+						, 0
+						);
+
+					if (status != ASTCENC_SUCCESS)
+					{
 						BX_TRACE("astc error in compress image %s", astcenc_get_error_string(status));
 						imageCheckerboard(_dst, _width, _height, 16, UINT32_C(0xff000000), UINT32_C(0xffffff00) );
+
 						astcenc_context_free(context);
 						break;
 					}
diff --git a/src/image_encode.cpp b/src/image_encode.cpp
index 39f8ae1..e006940 100644
--- a/src/image_encode.cpp
+++ b/src/image_encode.cpp
@@ -52,7 +52,7 @@ namespace bimg
 		ASTCENC_PRE_THOROUGH,     // Highest
 		ASTCENC_PRE_FAST,         // Fastest
 	};
-	BX_STATIC_ASSERT(Quality::Count == BX_COUNTOF(s_astcQuality));
+	BX_STATIC_ASSERT(Quality::Count == BX_COUNTOF(s_astcQuality) );
 
 	void imageEncodeFromRgba8(bx::AllocatorI* _allocator, void* _dst, const void* _src, uint32_t _width, uint32_t _height, uint32_t _depth, TextureFormat::Enum _format, Quality::Enum _quality, bx::Error* _err)
 	{
@@ -156,61 +156,83 @@ namespace bimg
 			case TextureFormat::ASTC12x10:
 			case TextureFormat::ASTC12x12:
 				{
-					const unsigned int thread_count = 1;
 					const bimg::ImageBlockInfo& astcBlockInfo = bimg::getBlockInfo(_format);
-					const float quality = s_astcQuality[_quality];
-					const astcenc_profile profile = ASTCENC_PRF_LDR; //Linear LDR color profile
-					astcenc_error status;
 
-					//Create and init config and context
 					astcenc_config config{};
-					unsigned int astcFlags = ASTCENC_FLG_SELF_DECOMPRESS_ONLY;
-					if (Quality::NormalMapDefault <= _quality) {
+
+					uint32_t astcFlags = ASTCENC_FLG_SELF_DECOMPRESS_ONLY;
+
+					if (Quality::NormalMapDefault <= _quality)
+					{
 						astcFlags |= ASTCENC_FLG_MAP_NORMAL;
 					}
-					status = astcenc_config_init(profile, astcBlockInfo.blockWidth, astcBlockInfo.blockHeight, 1, quality, astcFlags, &config);
-					if (status != ASTCENC_SUCCESS) {
-						BX_TRACE("astc error in config init %s", astcenc_get_error_string(status));
+
+					astcenc_error status = astcenc_config_init(
+						  ASTCENC_PRF_LDR
+						, astcBlockInfo.blockWidth
+						, astcBlockInfo.blockHeight
+						, 1
+						, s_astcQuality[_quality]
+						, astcFlags
+						, &config
+						);
+
+					if (status != ASTCENC_SUCCESS)
+					{
+						BX_TRACE("astc error in config init %s", astcenc_get_error_string(status) );
 						BX_ERROR_SET(_err, BIMG_ERROR, "Unable to initialize astc config!");
 						break;
 					}
 
 					astcenc_context* context;
-					status = astcenc_context_alloc(&config, thread_count, &context);
-					if (status != ASTCENC_SUCCESS) {
-						BX_TRACE("astc error in context alloc %s", astcenc_get_error_string(status));
+					status = astcenc_context_alloc(&config, 1, &context);
+
+					if (status != ASTCENC_SUCCESS)
+					{
+						BX_TRACE("astc error in context alloc %s", astcenc_get_error_string(status) );
 						BX_ERROR_SET(_err, BIMG_ERROR, "Unable to alloc astc context!");
 						break;
 					}
 
-					//Put image data into an astcenc_image
 					astcenc_image image{};
-					image.dim_x = _width;
-					image.dim_y = _height;
-					image.dim_z = 1;
+					image.dim_x     = _width;
+					image.dim_y     = _height;
+					image.dim_z     = 1;
 					image.data_type = ASTCENC_TYPE_U8;
-					image.data = reinterpret_cast<void**>(const_cast<uint8_t**>(&src));
+					image.data      = (void**)&src;
 
-					const size_t block_count_x = (_width + astcBlockInfo.blockWidth - 1) / astcBlockInfo.blockWidth;
-					const size_t block_count_y = (_height + astcBlockInfo.blockHeight - 1) / astcBlockInfo.blockHeight;
-					const size_t comp_len = block_count_x * block_count_y * 16;
+					const size_t blockCountX = (_width  + astcBlockInfo.blockWidth  - 1) / astcBlockInfo.blockWidth;
+					const size_t blockCountY = (_height + astcBlockInfo.blockHeight - 1) / astcBlockInfo.blockHeight;
+					const size_t compLen     = blockCountX * blockCountY * 16;
 
 					if (Quality::NormalMapDefault <= _quality)
 					{
-						static const astcenc_swizzle swizzle { //0001/rrrg swizzle corresponds to ASTC_ENC_NORMAL_RA
-							ASTCENC_SWZ_R, ASTCENC_SWZ_R, ASTCENC_SWZ_R, ASTCENC_SWZ_G
+						static const astcenc_swizzle swizzle
+						{  //0001/rrrg swizzle corresponds to ASTC_ENC_NORMAL_RA
+							ASTCENC_SWZ_R,
+							ASTCENC_SWZ_R,
+							ASTCENC_SWZ_R,
+							ASTCENC_SWZ_G,
 						};
-						status = astcenc_compress_image(context, &image, &swizzle, dst, comp_len, 0);
+
+						status = astcenc_compress_image(context, &image, &swizzle, dst, compLen, 0);
 					}
 					else
 					{
-						static const astcenc_swizzle swizzle { //0123/rgba swizzle corresponds to ASTC_RGBA
-							ASTCENC_SWZ_R, ASTCENC_SWZ_G, ASTCENC_SWZ_B, ASTCENC_SWZ_A
+						static const astcenc_swizzle swizzle
+						{  //0123/rgba swizzle corresponds to ASTC_RGBA
+							ASTCENC_SWZ_R,
+							ASTCENC_SWZ_G,
+							ASTCENC_SWZ_B,
+							ASTCENC_SWZ_A,
 						};
-						status = astcenc_compress_image(context, &image, &swizzle, dst, comp_len, 0);
+
+						status = astcenc_compress_image(context, &image, &swizzle, dst, compLen, 0);
 					}
-					if (status != ASTCENC_SUCCESS) {
-						BX_TRACE("astc error in compress image %s", astcenc_get_error_string(status));
+
+					if (status != ASTCENC_SUCCESS)
+					{
+						BX_TRACE("astc error in compress image %s", astcenc_get_error_string(status) );
 						BX_ERROR_SET(_err, BIMG_ERROR, "Unable to compress astc image!");
 						astcenc_context_free(context);
 						break;