Updated astc-encoder.

This commit is contained in:
Бранимир Караџић
2023-01-01 19:16:52 -08:00
parent 6a15b31f19
commit 1395f4e969
17 changed files with 201 additions and 232 deletions

View File

@@ -514,25 +514,15 @@ struct astcenc_config
float tune_db_limit;
/**
* @brief The amount of overshoot needed to early-out mode 0 fast path.
* @brief The amount of MSE overshoot needed to early-out trials.
*
* We have a fast-path for mode 0 (1 partition, 1 plane) which uses only essential block modes
* as an initial search. This can short-cut compression for simple blocks, but to avoid
* short-cutting too much we force this to overshoot the MSE threshold needed to hit the
* block-local db_limit e.g. 1.0 = no overshoot, 2.0 = need half the error to trigger.
*/
float tune_mode0_mse_overshoot;
/**
* @brief The amount of overshoot needed to early-out refinement.
* The first early-out is for 1 partition, 1 plane trials, where we try a minimal encode using
* the high probability block modes. This can short-cut compression for simple blocks.
*
* The codec will refine block candidates iteratively to improve the encoding, based on the
* @c tune_refinement_limit count. Earlier implementations will use all refinement iterations,
* even if the target threshold is reached. This tuning parameter allows an early out, but with
* an overshoot MSE threshold. Setting this to 1.0 will early-out as soon as the target is hit,
* but does reduce image quality vs the default behavior of over-refinement.
* The second early-out is for refinement trials, where we can exit refinement once quality is
* reached.
*/
float tune_refinement_mse_overshoot;
float tune_mse_overshoot;
/**
* @brief The threshold for skipping 3.1/4.1 trials (-2partitionlimitfactor).

View File

@@ -1960,7 +1960,7 @@ uint8_t pack_color_endpoints(
switch (format)
{
case FMT_RGB:
if (quant_level <= 18)
if (quant_level <= QUANT_160)
{
if (try_quantize_rgb_delta_blue_contract(color0, color1, output, quant_level))
{
@@ -1973,7 +1973,7 @@ uint8_t pack_color_endpoints(
break;
}
}
if (try_quantize_rgb_blue_contract(color0, color1, output, quant_level))
if (quant_level < QUANT_256 && try_quantize_rgb_blue_contract(color0, color1, output, quant_level))
{
retval = FMT_RGB;
break;
@@ -1983,7 +1983,7 @@ uint8_t pack_color_endpoints(
break;
case FMT_RGBA:
if (quant_level <= 18)
if (quant_level <= QUANT_160)
{
if (try_quantize_rgba_delta_blue_contract(color0, color1, output, quant_level))
{
@@ -1996,7 +1996,7 @@ uint8_t pack_color_endpoints(
break;
}
}
if (try_quantize_rgba_blue_contract(color0, color1, output, quant_level))
if (quant_level < QUANT_256 && try_quantize_rgba_blue_contract(color0, color1, output, quant_level))
{
retval = FMT_RGBA;
break;

View File

@@ -82,7 +82,7 @@ static bool realign_weights_undecimated(
const quant_and_transfer_table& qat = quant_and_xfer_tables[weight_quant_level];
unsigned int max_plane = bm.is_dual_plane;
int plane2_component = bm.is_dual_plane ? scb.plane2_component : -1;
int plane2_component = scb.plane2_component;
vmask4 plane_mask = vint4::lane_id() == vint4(plane2_component);
// Decode the color endpoints
@@ -206,7 +206,7 @@ static bool realign_weights_decimated(
assert(weight_count != bsd.texel_count);
unsigned int max_plane = bm.is_dual_plane;
int plane2_component = bm.is_dual_plane ? scb.plane2_component : -1;
int plane2_component = scb.plane2_component;
vmask4 plane_mask = vint4::lane_id() == vint4(plane2_component);
// Decode the color endpoints
@@ -1279,13 +1279,13 @@ void compress_block(
// compression and slightly reduces image quality.
float errorval_mult[2] {
1.0f / ctx.config.tune_mode0_mse_overshoot,
1.0f / ctx.config.tune_mse_overshoot,
1.0f
};
static const float errorval_overshoot = 1.0f / ctx.config.tune_refinement_mse_overshoot;
static const float errorval_overshoot = 1.0f / ctx.config.tune_mse_overshoot;
// Only enable MODE0 fast path (trial 0) if 2D and more than 25 texels
// Only enable MODE0 fast path (trial 0) if 2D, and more than 25 texels
int start_trial = 1;
if ((bsd.texel_count >= TUNE_MIN_TEXELS_MODE0_FASTPATH) && (bsd.zdim == 1))
{

View File

@@ -286,7 +286,7 @@ void decompress_symbolic_block(
unpack_weights(bsd, scb, di, is_dual_plane, plane1_weights, plane2_weights);
// Now that we have endpoint colors and weights, we can unpack texel colors
int plane2_component = is_dual_plane ? scb.plane2_component : -1;
int plane2_component = scb.plane2_component;
vmask4 plane2_mask = vint4::lane_id() == vint4(plane2_component);
for (int i = 0; i < partition_count; i++)

View File

@@ -51,89 +51,84 @@ struct astcenc_preset_config
unsigned int tune_4partitioning_candidate_limit;
float tune_db_limit_a_base;
float tune_db_limit_b_base;
float tune_mode0_mse_overshoot;
float tune_refinement_mse_overshoot;
float tune_mse_overshoot;
float tune_2_partition_early_out_limit_factor;
float tune_3_partition_early_out_limit_factor;
float tune_2_plane_early_out_limit_correlation;
};
/**
* @brief The static quality presets that are built-in for high bandwidth
* presets (x < 25 texels per block).
* @brief The static presets for high bandwidth encodings (x < 25 texels per block).
*/
static const std::array<astcenc_preset_config, 6> preset_configs_high {{
{
ASTCENC_PRE_FASTEST,
2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.0f, 0.85f
2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.85f
}, {
ASTCENC_PRE_FAST,
3, 18, 10, 8, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.0f, 0.90f
3, 18, 10, 8, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.90f
}, {
ASTCENC_PRE_MEDIUM,
4, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 2.5f, 2.5f, 1.1f, 1.05f, 0.95f
4, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 2.5f, 1.1f, 1.05f, 0.95f
}, {
ASTCENC_PRE_THOROUGH,
4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 10.0f, 1.35f, 1.15f, 0.97f
4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.35f, 1.15f, 0.97f
}, {
ASTCENC_PRE_VERYTHOROUGH,
4, 256, 128, 64, 98, 4, 6, 20, 14, 8, 200.0f, 200.0f, 10.0f, 10.0f, 1.6f, 1.4f, 0.98f
4, 256, 128, 64, 98, 4, 6, 20, 14, 8, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f
}, {
ASTCENC_PRE_EXHAUSTIVE,
4, 512, 512, 512, 100, 4, 8, 32, 32, 32, 200.0f, 200.0f, 10.0f, 10.0f, 2.0f, 2.0f, 0.99f
4, 512, 512, 512, 100, 4, 8, 32, 32, 32, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f
}
}};
/**
* @brief The static quality presets that are built-in for medium bandwidth
* presets (25 <= x < 64 texels per block).
* @brief The static presets for medium bandwidth encodings (25 <= x < 64 texels per block).
*/
static const std::array<astcenc_preset_config, 6> preset_configs_mid {{
{
ASTCENC_PRE_FASTEST,
2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.0f, 0.80f
2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.80f
}, {
ASTCENC_PRE_FAST,
3, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.0f, 0.85f
3, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.85f
}, {
ASTCENC_PRE_MEDIUM,
4, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.0f, 3.0f, 1.1f, 1.05f, 0.90f
4, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.0f, 1.1f, 1.05f, 0.90f
}, {
ASTCENC_PRE_THOROUGH,
4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 10.0f, 1.4f, 1.2f, 0.95f
4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.4f, 1.2f, 0.95f
}, {
ASTCENC_PRE_VERYTHOROUGH,
4, 256, 128, 64, 98, 4, 6, 12, 8, 3, 200.0f, 200.0f, 10.0f, 10.0f, 1.6f, 1.4f, 0.98f
4, 256, 128, 64, 98, 4, 6, 12, 8, 3, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f
}, {
ASTCENC_PRE_EXHAUSTIVE,
4, 256, 256, 256, 100, 4, 8, 32, 32, 32, 200.0f, 200.0f, 10.0f, 10.0f, 2.0f, 2.0f, 0.99f
4, 256, 256, 256, 100, 4, 8, 32, 32, 32, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f
}
}};
/**
* @brief The static quality presets that are built-in for low bandwidth
* presets (64 <= x texels per block).
* @brief The static presets for low bandwidth encodings (64 <= x texels per block).
*/
static const std::array<astcenc_preset_config, 6> preset_configs_low {{
{
ASTCENC_PRE_FASTEST,
2, 10, 6, 4, 40, 2, 2, 2, 2, 2, 85.0f, 63.0f, 3.5f, 3.5f, 1.0f, 1.0f, 0.80f
2, 10, 6, 4, 40, 2, 2, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.0f, 1.0f, 0.80f
}, {
ASTCENC_PRE_FAST,
2, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.0f, 63.0f, 3.5f, 3.5f, 1.0f, 1.0f, 0.85f
2, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.0f, 1.0f, 0.85f
}, {
ASTCENC_PRE_MEDIUM,
3, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.5f, 3.5f, 1.1f, 1.05f, 0.90f
3, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.5f, 1.1f, 1.05f, 0.90f
}, {
ASTCENC_PRE_THOROUGH,
4, 82, 60, 30, 93, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 10.0f, 1.3f, 1.2f, 0.97f
4, 82, 60, 30, 93, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.3f, 1.2f, 0.97f
}, {
ASTCENC_PRE_VERYTHOROUGH,
4, 256, 128, 64, 98, 4, 6, 9, 5, 2, 200.0f, 200.0f, 10.0f, 10.0f, 1.6f, 1.4f, 0.98f
4, 256, 128, 64, 98, 4, 6, 9, 5, 2, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f
}, {
ASTCENC_PRE_EXHAUSTIVE,
4, 256, 256, 256, 100, 4, 8, 32, 32, 32, 200.0f, 200.0f, 10.0f, 10.0f, 2.0f, 2.0f, 0.99f
4, 256, 256, 256, 100, 4, 8, 32, 32, 32, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f
}
}};
@@ -444,8 +439,7 @@ static astcenc_error validate_config(
config.tune_3partitioning_candidate_limit = astc::clamp(config.tune_3partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIIONING_CANDIDATES);
config.tune_4partitioning_candidate_limit = astc::clamp(config.tune_4partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIIONING_CANDIDATES);
config.tune_db_limit = astc::max(config.tune_db_limit, 0.0f);
config.tune_mode0_mse_overshoot = astc::max(config.tune_mode0_mse_overshoot, 1.0f);
config.tune_refinement_mse_overshoot = astc::max(config.tune_refinement_mse_overshoot, 1.0f);
config.tune_mse_overshoot = astc::max(config.tune_mse_overshoot, 1.0f);
config.tune_2_partition_early_out_limit_factor = astc::max(config.tune_2_partition_early_out_limit_factor, 0.0f);
config.tune_3_partition_early_out_limit_factor = astc::max(config.tune_3_partition_early_out_limit_factor, 0.0f);
config.tune_2_plane_early_out_limit_correlation = astc::max(config.tune_2_plane_early_out_limit_correlation, 0.0f);
@@ -568,8 +562,7 @@ astcenc_error astcenc_config_init(
config.tune_db_limit = astc::max((*preset_configs)[start].tune_db_limit_a_base - 35 * ltexels,
(*preset_configs)[start].tune_db_limit_b_base - 19 * ltexels);
config.tune_mode0_mse_overshoot = (*preset_configs)[start].tune_mode0_mse_overshoot;
config.tune_refinement_mse_overshoot = (*preset_configs)[start].tune_refinement_mse_overshoot;
config.tune_mse_overshoot = (*preset_configs)[start].tune_mse_overshoot;
config.tune_2_partition_early_out_limit_factor = (*preset_configs)[start].tune_2_partition_early_out_limit_factor;
config.tune_3_partition_early_out_limit_factor =(*preset_configs)[start].tune_3_partition_early_out_limit_factor;
@@ -611,8 +604,7 @@ astcenc_error astcenc_config_init(
config.tune_db_limit = astc::max(LERP(tune_db_limit_a_base) - 35 * ltexels,
LERP(tune_db_limit_b_base) - 19 * ltexels);
config.tune_mode0_mse_overshoot = LERP(tune_mode0_mse_overshoot);
config.tune_refinement_mse_overshoot = LERP(tune_refinement_mse_overshoot);
config.tune_mse_overshoot = LERP(tune_mse_overshoot);
config.tune_2_partition_early_out_limit_factor = LERP(tune_2_partition_early_out_limit_factor);
config.tune_3_partition_early_out_limit_factor = LERP(tune_3_partition_early_out_limit_factor);

View File

@@ -362,6 +362,7 @@ static void count_partition_mismatch_bits(
unsigned int mismatch_counts[BLOCK_MAX_PARTITIONINGS]
) {
unsigned int active_count = bsd.partitioning_count_selected[partition_count - 1];
promise(active_count > 0);
if (partition_count == 2)
{
@@ -400,6 +401,7 @@ static unsigned int get_partition_ordering_by_mismatch_bits(
const unsigned int mismatch_count[BLOCK_MAX_PARTITIONINGS],
unsigned int partition_ordering[BLOCK_MAX_PARTITIONINGS]
) {
promise(partitioning_count > 0);
unsigned int mscount[256] { 0 };
// Create the histogram of mismatch counts
@@ -488,7 +490,7 @@ static unsigned int compute_kmeans_partition_ordering(
/**
* @brief Insert a partitioning into an order list of results, sorted by error.
*
* @param max_values The max number of entries in the best result arrays/
* @param max_values The max number of entries in the best result arrays.
* @param this_error The error of the new entry.
* @param this_partition The partition ID of the new entry.
* @param[out] best_errors The array of best error values.
@@ -501,6 +503,8 @@ static void insert_result(
float* best_errors,
unsigned int* best_partitions)
{
promise(max_values > 0);
// Don't bother searching if the current worst error beats the new error
if (this_error >= best_errors[max_values - 1])
{
@@ -508,7 +512,7 @@ static void insert_result(
}
// Else insert into the list in error-order
for (unsigned int i = 0; i < max_values; i++)
for (unsigned int i = 0; i < max_values; i++)
{
// Existing result is better - move on ...
if (this_error > best_errors[i])

View File

@@ -692,6 +692,7 @@ float compute_error_of_weight_set_1plane(
) {
vfloatacc error_summav = vfloatacc::zero();
unsigned int texel_count = di.texel_count;
promise(texel_count > 0);
// Process SIMD-width chunks, safe to over-fetch - the extra space is zero initialized
if (di.max_texel_weight_count > 2)
@@ -757,6 +758,7 @@ float compute_error_of_weight_set_2planes(
) {
vfloatacc error_summav = vfloatacc::zero();
unsigned int texel_count = di.texel_count;
promise(texel_count > 0);
// Process SIMD-width chunks, safe to over-fetch - the extra space is zero initialized
if (di.max_texel_weight_count > 2)
@@ -861,8 +863,7 @@ void compute_ideal_weights_for_decimation(
// zero-initialized SIMD over-fetch region
if (is_direct)
{
unsigned int texel_count_simd = round_up_to_simd_multiple_vla(texel_count);
for (unsigned int i = 0; i < texel_count_simd; i += ASTCENC_SIMD_WIDTH)
for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
{
vfloat weight(ei.weights + i);
storea(weight, dec_weight_ideal_value + i);
@@ -970,7 +971,7 @@ void compute_ideal_weights_for_decimation(
vfloat step = (error_change1 * chd_scale) / error_change0;
step = clamp(-stepsize, stepsize, step);
// Update the weight; note this can store negative values.
// Update the weight; note this can store negative values
storea(weight_val + step, dec_weight_ideal_value + i);
}
}
@@ -1215,7 +1216,7 @@ void recompute_ideal_colors_1plane(
// Only compute a partition mean if more than one partition
if (partition_count > 1)
{
rgba_sum = vfloat4(1e-17f);
rgba_sum = vfloat4::zero();
promise(texel_count > 0);
for (unsigned int j = 0; j < texel_count; j++)
{
@@ -1251,7 +1252,6 @@ void recompute_ideal_colors_1plane(
for (unsigned int j = 0; j < texel_count; j++)
{
unsigned int tix = texel_indexes[j];
vfloat4 rgba = blk.texel(tix);
float idx0 = undec_weight_ref[tix];
@@ -1284,9 +1284,6 @@ void recompute_ideal_colors_1plane(
vfloat4 right_sum = vfloat4(right_sum_s) * color_weight;
vfloat4 lmrs_sum = vfloat3(left_sum_s, middle_sum_s, right_sum_s) * ls_weight;
vfloat4 weight_weight_sum = vfloat4(weight_weight_sum_s) * color_weight;
float psum = right_sum_s * hadd_rgb_s(color_weight);
color_vec_x = color_vec_x * color_weight;
color_vec_y = color_vec_y * color_weight;
@@ -1349,26 +1346,32 @@ void recompute_ideal_colors_1plane(
}
}
// Calculations specific to mode #7, the HDR RGB-scale mode
vfloat4 rgbq_sum = color_vec_x + color_vec_y;
rgbq_sum.set_lane<3>(hadd_rgb_s(color_vec_y));
vfloat4 rgbovec = compute_rgbo_vector(rgba_weight_sum, weight_weight_sum, rgbq_sum, psum);
rgbo_vectors[i] = rgbovec;
// We can get a failure due to the use of a singular (non-invertible) matrix
// If it failed, compute rgbo_vectors[] with a different method ...
if (astc::isnan(dot_s(rgbovec, rgbovec)))
// Calculations specific to mode #7, the HDR RGB-scale mode - skip if known LDR
if (blk.rgb_lns[0] || blk.alpha_lns[0])
{
vfloat4 v0 = ep.endpt0[i];
vfloat4 v1 = ep.endpt1[i];
vfloat4 weight_weight_sum = vfloat4(weight_weight_sum_s) * color_weight;
float psum = right_sum_s * hadd_rgb_s(color_weight);
float avgdif = hadd_rgb_s(v1 - v0) * (1.0f / 3.0f);
avgdif = astc::max(avgdif, 0.0f);
vfloat4 rgbq_sum = color_vec_x + color_vec_y;
rgbq_sum.set_lane<3>(hadd_rgb_s(color_vec_y));
vfloat4 avg = (v0 + v1) * 0.5f;
vfloat4 ep0 = avg - vfloat4(avgdif) * 0.5f;
rgbo_vectors[i] = vfloat4(ep0.lane<0>(), ep0.lane<1>(), ep0.lane<2>(), avgdif);
vfloat4 rgbovec = compute_rgbo_vector(rgba_weight_sum, weight_weight_sum, rgbq_sum, psum);
rgbo_vectors[i] = rgbovec;
// We can get a failure due to the use of a singular (non-invertible) matrix
// If it failed, compute rgbo_vectors[] with a different method ...
if (astc::isnan(dot_s(rgbovec, rgbovec)))
{
vfloat4 v0 = ep.endpt0[i];
vfloat4 v1 = ep.endpt1[i];
float avgdif = hadd_rgb_s(v1 - v0) * (1.0f / 3.0f);
avgdif = astc::max(avgdif, 0.0f);
vfloat4 avg = (v0 + v1) * 0.5f;
vfloat4 ep0 = avg - vfloat4(avgdif) * 0.5f;
rgbo_vectors[i] = vfloat4(ep0.lane<0>(), ep0.lane<1>(), ep0.lane<2>(), avgdif);
}
}
}
}
@@ -1516,7 +1519,7 @@ void recompute_ideal_colors_2planes(
color_vec_x += cwprod - cwiprod;
scale_vec += vfloat2(om_idx0, idx0) * (ls_weight * scale);
weight_weight_sum += (color_weight * color_idx);
weight_weight_sum += color_idx;
}
vfloat4 left1_sum = vfloat4(left1_sum_s) * color_weight;
@@ -1528,8 +1531,6 @@ void recompute_ideal_colors_2planes(
vfloat4 middle2_sum = vfloat4(middle2_sum_s) * color_weight;
vfloat4 right2_sum = vfloat4(right2_sum_s) * color_weight;
float psum = dot3_s(select(right1_sum, right2_sum, p2_mask), color_weight);
color_vec_x = color_vec_x * color_weight;
color_vec_y = color_vec_y * color_weight;
@@ -1630,26 +1631,32 @@ void recompute_ideal_colors_2planes(
ep.endpt1[0] = select(ep.endpt1[0], ep1, full_mask);
}
// Calculations specific to mode #7, the HDR RGB-scale mode
vfloat4 rgbq_sum = color_vec_x + color_vec_y;
rgbq_sum.set_lane<3>(hadd_rgb_s(color_vec_y));
rgbo_vector = compute_rgbo_vector(rgba_weight_sum, weight_weight_sum, rgbq_sum, psum);
// We can get a failure due to the use of a singular (non-invertible) matrix
// If it failed, compute rgbo_vectors[] with a different method ...
if (astc::isnan(dot_s(rgbo_vector, rgbo_vector)))
// Calculations specific to mode #7, the HDR RGB-scale mode - skip if known LDR
if (blk.rgb_lns[0] || blk.alpha_lns[0])
{
vfloat4 v0 = ep.endpt0[0];
vfloat4 v1 = ep.endpt1[0];
weight_weight_sum = weight_weight_sum * color_weight;
float psum = dot3_s(select(right1_sum, right2_sum, p2_mask), color_weight);
float avgdif = hadd_rgb_s(v1 - v0) * (1.0f / 3.0f);
avgdif = astc::max(avgdif, 0.0f);
vfloat4 rgbq_sum = color_vec_x + color_vec_y;
rgbq_sum.set_lane<3>(hadd_rgb_s(color_vec_y));
vfloat4 avg = (v0 + v1) * 0.5f;
vfloat4 ep0 = avg - vfloat4(avgdif) * 0.5f;
rgbo_vector = compute_rgbo_vector(rgba_weight_sum, weight_weight_sum, rgbq_sum, psum);
rgbo_vector = vfloat4(ep0.lane<0>(), ep0.lane<1>(), ep0.lane<2>(), avgdif);
// We can get a failure due to the use of a singular (non-invertible) matrix
// If it failed, compute rgbo_vectors[] with a different method ...
if (astc::isnan(dot_s(rgbo_vector, rgbo_vector)))
{
vfloat4 v0 = ep.endpt0[0];
vfloat4 v1 = ep.endpt1[0];
float avgdif = hadd_rgb_s(v1 - v0) * (1.0f / 3.0f);
avgdif = astc::max(avgdif, 0.0f);
vfloat4 avg = (v0 + v1) * 0.5f;
vfloat4 ep0 = avg - vfloat4(avgdif) * 0.5f;
rgbo_vector = vfloat4(ep0.lane<0>(), ep0.lane<1>(), ep0.lane<2>(), avgdif);
}
}
}

View File

@@ -24,6 +24,7 @@
#include <array>
/** @brief Unpacked quint triplets <low,middle,high> for each packed value */
// TODO: Bitpack these into a uint16_t?
static const uint8_t quints_of_integer[128][3] {
{0, 0, 0}, {1, 0, 0}, {2, 0, 0}, {3, 0, 0},
{4, 0, 0}, {0, 4, 0}, {4, 4, 0}, {4, 4, 4},
@@ -99,6 +100,7 @@ static const uint8_t integer_of_quints[5][5][5] {
};
/** @brief Unpacked trit quintuplets <low,...,high> for each packed value */
// TODO: Bitpack these into a uint16_t?
static const uint8_t trits_of_integer[256][5] {
{0, 0, 0, 0, 0}, {1, 0, 0, 0, 0}, {2, 0, 0, 0, 0}, {0, 0, 2, 0, 0},
{0, 1, 0, 0, 0}, {1, 1, 0, 0, 0}, {2, 1, 0, 0, 0}, {1, 0, 2, 0, 0},
@@ -334,44 +336,41 @@ static const uint8_t integer_of_trits[3][3][3][3][3] {
*/
struct btq_count
{
/** @brief The quantization level. */
uint8_t quant;
/** @brief The number of bits. */
uint8_t bits;
uint8_t bits:6;
/** @brief The number of trits. */
uint8_t trits;
uint8_t trits:1;
/** @brief The number of quints. */
uint8_t quints;
uint8_t quints:1;
};
/**
* @brief The table of bits, trits, and quints needed for a quant encode.
*/
static const std::array<btq_count, 21> btq_counts {{
{ QUANT_2, 1, 0, 0 },
{ QUANT_3, 0, 1, 0 },
{ QUANT_4, 2, 0, 0 },
{ QUANT_5, 0, 0, 1 },
{ QUANT_6, 1, 1, 0 },
{ QUANT_8, 3, 0, 0 },
{ QUANT_10, 1, 0, 1 },
{ QUANT_12, 2, 1, 0 },
{ QUANT_16, 4, 0, 0 },
{ QUANT_20, 2, 0, 1 },
{ QUANT_24, 3, 1, 0 },
{ QUANT_32, 5, 0, 0 },
{ QUANT_40, 3, 0, 1 },
{ QUANT_48, 4, 1, 0 },
{ QUANT_64, 6, 0, 0 },
{ QUANT_80, 4, 0, 1 },
{ QUANT_96, 5, 1, 0 },
{ QUANT_128, 7, 0, 0 },
{ QUANT_160, 5, 0, 1 },
{ QUANT_192, 6, 1, 0 },
{ QUANT_256, 8, 0, 0 }
{ 1, 0, 0 }, // QUANT_2
{ 0, 1, 0 }, // QUANT_3
{ 2, 0, 0 }, // QUANT_4
{ 0, 0, 1 }, // QUANT_5
{ 1, 1, 0 }, // QUANT_6
{ 3, 0, 0 }, // QUANT_8
{ 1, 0, 1 }, // QUANT_10
{ 2, 1, 0 }, // QUANT_12
{ 4, 0, 0 }, // QUANT_16
{ 2, 0, 1 }, // QUANT_20
{ 3, 1, 0 }, // QUANT_24
{ 5, 0, 0 }, // QUANT_32
{ 3, 0, 1 }, // QUANT_40
{ 4, 1, 0 }, // QUANT_48
{ 6, 0, 0 }, // QUANT_64
{ 4, 0, 1 }, // QUANT_80
{ 5, 1, 0 }, // QUANT_96
{ 7, 0, 0 }, // QUANT_128
{ 5, 0, 1 }, // QUANT_160
{ 6, 1, 0 }, // QUANT_192
{ 8, 0, 0 } // QUANT_256
}};
/**
@@ -382,44 +381,38 @@ static const std::array<btq_count, 21> btq_counts {{
*/
struct ise_size
{
/** @brief The quantization level. */
uint8_t quant;
/** @brief The scaling parameter. */
uint8_t scale;
/** @brief The rounding parameter. */
uint8_t round;
uint8_t scale:6;
/** @brief The divisor parameter. */
uint8_t divisor;
uint8_t divisor:2;
};
/**
* @brief The table of scale, round, and divisors needed for quant sizing.
*/
static const std::array<ise_size, 21> ise_sizes {{
{ QUANT_2, 1, 0, 1 },
{ QUANT_3, 8, 4, 5 },
{ QUANT_4, 2, 0, 1 },
{ QUANT_5, 7, 2, 3 },
{ QUANT_6, 13, 4, 5 },
{ QUANT_8, 3, 0, 1 },
{ QUANT_10, 10, 2, 3 },
{ QUANT_12, 18, 4, 5 },
{ QUANT_16, 4, 0, 1 },
{ QUANT_20, 13, 2, 3 },
{ QUANT_24, 23, 4, 5 },
{ QUANT_32, 5, 0, 1 },
{ QUANT_40, 16, 2, 3 },
{ QUANT_48, 28, 4, 5 },
{ QUANT_64, 6, 0, 1 },
{ QUANT_80, 19, 2, 3 },
{ QUANT_96, 33, 4, 5 },
{ QUANT_128, 7, 0, 1 },
{ QUANT_160, 22, 2, 3 },
{ QUANT_192, 38, 4, 5 },
{ QUANT_256, 8, 0, 1 }
{ 1, 0 }, // QUANT_2
{ 8, 2 }, // QUANT_3
{ 2, 0 }, // QUANT_4
{ 7, 1 }, // QUANT_5
{ 13, 2 }, // QUANT_6
{ 3, 0 }, // QUANT_8
{ 10, 1 }, // QUANT_10
{ 18, 2 }, // QUANT_12
{ 4, 0 }, // QUANT_16
{ 13, 1 }, // QUANT_20
{ 23, 2 }, // QUANT_24
{ 5, 0 }, // QUANT_32
{ 16, 1 }, // QUANT_40
{ 28, 2 }, // QUANT_48
{ 6, 0 }, // QUANT_64
{ 19, 1 }, // QUANT_80
{ 33, 2 }, // QUANT_96
{ 7, 0 }, // QUANT_128
{ 22, 1 }, // QUANT_160
{ 38, 2 }, // QUANT_192
{ 8, 0 } // QUANT_256
}};
/* See header for documentation. */
@@ -435,7 +428,8 @@ unsigned int get_ise_sequence_bitcount(
}
auto& entry = ise_sizes[quant_level];
return (entry.scale * character_count + entry.round) / entry.divisor;
unsigned int divisor = (entry.divisor << 1) + 1;
return (entry.scale * character_count + divisor - 1) / divisor;
}
/**
@@ -645,7 +639,6 @@ void encode_ise(
// Write out just bits
else
{
promise(character_count > 0);
for (unsigned int i = 0; i < character_count; i++)
{
write_bits(input_data[i], bits, bit_offset, output_data);
@@ -685,10 +678,10 @@ void decode_ise(
if (trits)
{
static const unsigned int bits_to_read[5] { 2, 2, 1, 2, 1 };
static const unsigned int block_shift[5] { 0, 2, 4, 5, 7 };
static const unsigned int next_lcounter[5] { 1, 2, 3, 4, 0 };
static const unsigned int hcounter_incr[5] { 0, 0, 0, 0, 1 };
static const uint8_t bits_to_read[5] { 2, 2, 1, 2, 1 };
static const uint8_t block_shift[5] { 0, 2, 4, 5, 7 };
static const uint8_t next_lcounter[5] { 1, 2, 3, 4, 0 };
static const uint8_t hcounter_incr[5] { 0, 0, 0, 0, 1 };
unsigned int tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data);
bit_offset += bits_to_read[lcounter];
tq_blocks[hcounter] |= tdata << block_shift[lcounter];
@@ -698,10 +691,10 @@ void decode_ise(
if (quints)
{
static const unsigned int bits_to_read[3] { 3, 2, 2 };
static const unsigned int block_shift[3] { 0, 3, 5 };
static const unsigned int next_lcounter[3] { 1, 2, 0 };
static const unsigned int hcounter_incr[3] { 0, 0, 1 };
static const uint8_t bits_to_read[3] { 3, 2, 2 };
static const uint8_t block_shift[3] { 0, 3, 5 };
static const uint8_t next_lcounter[3] { 1, 2, 0 };
static const uint8_t hcounter_incr[3] { 0, 0, 1 };
unsigned int tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data);
bit_offset += bits_to_read[lcounter];
tq_blocks[hcounter] |= tdata << block_shift[lcounter];
@@ -714,6 +707,7 @@ void decode_ise(
if (trits)
{
unsigned int trit_blocks = (character_count + 4) / 5;
promise(trit_blocks > 0);
for (unsigned int i = 0; i < trit_blocks; i++)
{
const uint8_t *tritptr = trits_of_integer[tq_blocks[i]];
@@ -728,6 +722,7 @@ void decode_ise(
if (quints)
{
unsigned int quint_blocks = (character_count + 2) / 3;
promise(quint_blocks > 0);
for (unsigned int i = 0; i < quint_blocks; i++)
{
const uint8_t *quintptr = quints_of_integer[tq_blocks[i]];

View File

@@ -1008,9 +1008,6 @@ struct dt_init_working_buffers
*/
struct quant_and_transfer_table
{
/** @brief The quantization level used. */
quant_method method;
/** @brief The unscrambled unquantized value. */
int8_t quant_to_unquant[32];

View File

@@ -15,13 +15,13 @@
// under the License.
// ----------------------------------------------------------------------------
#include "astcenc_mathlib.h"
/**
* @brief Soft-float library for IEEE-754.
*/
#if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0)
#include "astcenc_mathlib.h"
/* sized soft-float types. These are mapped to the sized integer
types of C99, instead of C's floating-point types; this is because
the library needs to maintain exact, bit-level control on all

View File

@@ -1166,11 +1166,11 @@ const float *get_2d_percentile_table(
unsigned int xdim,
unsigned int ydim
) {
float* unpacked_table = new float[2048];
float* unpacked_table = new float[WEIGHTS_MAX_BLOCK_MODES];
const packed_percentile_table *apt = get_packed_table(xdim, ydim);
// Set the default percentile
for (unsigned int i = 0; i < 2048; i++)
for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
{
unpacked_table[i] = 1.0f;
}

View File

@@ -325,11 +325,7 @@ static void compute_color_error_for_every_integer_count_and_quant_level(
) {
int partition_size = pi.partition_texel_count[partition_index];
static const float baseline_quant_error[21] {
(65536.0f * 65536.0f / 18.0f), // 2 values, 1 step
(65536.0f * 65536.0f / 18.0f) / (2 * 2), // 3 values, 2 steps
(65536.0f * 65536.0f / 18.0f) / (3 * 3), // 4 values, 3 steps
(65536.0f * 65536.0f / 18.0f) / (4 * 4), // 5 values
static const float baseline_quant_error[21 - QUANT_6] {
(65536.0f * 65536.0f / 18.0f) / (5 * 5),
(65536.0f * 65536.0f / 18.0f) / (7 * 7),
(65536.0f * 65536.0f / 18.0f) / (9 * 9),
@@ -528,7 +524,7 @@ static void compute_color_error_for_every_integer_count_and_quant_level(
// The base_quant_error should depend on the scale-factor that would be used during
// actual encode of the color value
float base_quant_error = baseline_quant_error[i] * static_cast<float>(partition_size);
float base_quant_error = baseline_quant_error[i - QUANT_6] * static_cast<float>(partition_size);
float rgb_quantization_error = error_weight_rgbsum * base_quant_error * 2.0f;
float alpha_quantization_error = error_weight.lane<3>() * base_quant_error * 2.0f;
float rgba_quantization_error = rgb_quantization_error + alpha_quantization_error;
@@ -591,7 +587,7 @@ static void compute_color_error_for_every_integer_count_and_quant_level(
error_scale_oe_rgb = 1.0f;
}
float base_quant_error = baseline_quant_error[i];
float base_quant_error = baseline_quant_error[i - QUANT_6];
float quant_error_rgb = base_quant_error_rgb * base_quant_error;
float quant_error_rgba = base_quant_error_rgba * base_quant_error;
@@ -1136,22 +1132,19 @@ unsigned int compute_ideal_endpoint_formats(
uint8_t (&best_ep_formats)[WEIGHTS_MAX_BLOCK_MODES][BLOCK_MAX_PARTITIONS] = tmpbuf.best_ep_formats;
// Ensure that the first iteration understep contains data that will never be picked
vfloat clear_error(ERROR_CALC_DEFAULT);
vint clear_quant(0);
unsigned int packed_start_block_mode = round_down_to_simd_multiple_vla(start_block_mode);
for (unsigned int i = packed_start_block_mode; i < start_block_mode; i++)
{
errors_of_best_combination[i] = ERROR_CALC_DEFAULT;
best_quant_levels[i] = QUANT_2;
best_quant_levels_mod[i] = QUANT_2;
}
storea(clear_error, errors_of_best_combination + packed_start_block_mode);
store_nbytes(clear_quant, best_quant_levels + packed_start_block_mode);
store_nbytes(clear_quant, best_quant_levels_mod + packed_start_block_mode);
// Ensure that last iteration overstep contains data that will never be picked
const unsigned int packed_end_block_mode = round_up_to_simd_multiple_vla(end_block_mode);
for (unsigned int i = end_block_mode; i < packed_end_block_mode; i++)
{
errors_of_best_combination[i] = ERROR_CALC_DEFAULT;
best_quant_levels[i] = QUANT_2;
best_quant_levels_mod[i] = QUANT_2;
}
unsigned int packed_end_block_mode = round_down_to_simd_multiple_vla(end_block_mode - 1);
storea(clear_error, errors_of_best_combination + packed_end_block_mode);
store_nbytes(clear_quant, best_quant_levels + packed_end_block_mode);
store_nbytes(clear_quant, best_quant_levels_mod + packed_end_block_mode);
// Track a scalar best to avoid expensive search at least once ...
float error_of_best_combination = ERROR_CALC_DEFAULT;

View File

@@ -1,6 +1,6 @@
// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
// Copyright 2020-2021 Arm Limited
// Copyright 2020-2022 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
@@ -47,7 +47,7 @@ static bool g_cpu_has_f16c { false };
============================================================================ */
#if !defined(__clang__) && defined(_MSC_VER)
#define WIN32_LEAN_AND_MEAN
#include <Windows.h>
#include <windows.h>
#include <intrin.h>
/**

View File

@@ -371,12 +371,15 @@ void physical_to_symbolic(
const auto& di = bsd.get_decimation_info(bm.decimation_mode);
int weight_count = di.weight_count;
promise(weight_count > 0);
quant_method weight_quant_method = static_cast<quant_method>(bm.quant_mode);
int is_dual_plane = bm.is_dual_plane;
int real_weight_count = is_dual_plane ? 2 * weight_count : weight_count;
int partition_count = read_bits(2, 11, pcb.data) + 1;
promise(partition_count > 0);
scb.block_mode = static_cast<uint16_t>(block_mode);
scb.partition_count = static_cast<uint8_t>(partition_count);
@@ -523,6 +526,7 @@ void physical_to_symbolic(
}
// Fetch component for second-plane in the case of dual plane of weights.
scb.plane2_component = -1;
if (is_dual_plane)
{
scb.plane2_component = static_cast<int8_t>(read_bits(2, below_weights_pos - 2, pcb.data));

View File

@@ -26,7 +26,7 @@
* with that is available at compile time. The current vector width is
* accessible for e.g. loop strides via the ASTCENC_SIMD_WIDTH constant.
*
* Explicit scalar types are acessible via the vint1, vfloat1, vmask1 types.
* Explicit scalar types are accessible via the vint1, vfloat1, vmask1 types.
* These are provided primarily for prototyping and algorithm debug of VLA
* implementations.
*
@@ -402,7 +402,7 @@ static ASTCENC_SIMD_INLINE vint4 clz(vint4 a)
// the original integer value into a 2^N encoding we can recover easily.
// Convert to float without risk of rounding up by keeping only top 8 bits.
// This trick is is guranteed to keep top 8 bits and clear the 9th.
// This trick is is guaranteed to keep top 8 bits and clear the 9th.
a = (~lsr<8>(a)) & a;
a = float_as_int(int_to_float(a));

View File

@@ -106,7 +106,7 @@ struct vfloat4
*/
template <int l> ASTCENC_SIMD_INLINE void set_lane(float a)
{
m = vld1q_lane_f32(&a, m, l);
m = vsetq_lane_f32(a, m, l);
}
/**
@@ -122,7 +122,7 @@ struct vfloat4
*/
static ASTCENC_SIMD_INLINE vfloat4 load1(const float* p)
{
return vfloat4(vdupq_n_f32(*p));
return vfloat4(vld1q_dup_f32(p));
}
/**
@@ -202,9 +202,8 @@ struct vint4
*/
ASTCENC_SIMD_INLINE explicit vint4(const uint8_t *p)
{
uint32x2_t t8 {};
// Cast is safe - NEON loads are allowed to be unaligned
t8 = vld1_lane_u32(reinterpret_cast<const uint32_t*>(p), t8, 0);
uint32x2_t t8 = vld1_dup_u32(reinterpret_cast<const uint32_t*>(p));
uint16x4_t t16 = vget_low_u16(vmovl_u8(vreinterpret_u8_u32(t8)));
m = vreinterpretq_s32_u32(vmovl_u16(t16));
}
@@ -251,7 +250,7 @@ struct vint4
*/
template <int l> ASTCENC_SIMD_INLINE void set_lane(int a)
{
m = vld1q_lane_s32(&a, m, l);
m = vsetq_lane_s32(a, m, l);
}
/**

View File

@@ -24,9 +24,8 @@
#define _ 0 // Using _ to indicate an entry that will not be used.
const quant_and_transfer_table quant_and_xfer_tables[12] {
// Quantization method 0, range 0..1
// QUANT2, range 0..1
{
QUANT_2,
{0, 64},
{0, 1},
{0, 64},
@@ -34,9 +33,8 @@ const quant_and_transfer_table quant_and_xfer_tables[12] {
_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,
0x4000}
},
// Quantization method 1, range 0..2
// QUANT_3, range 0..2
{
QUANT_3,
{0, 32, 64},
{0, 1, 2},
{0, 32, 64},
@@ -44,19 +42,17 @@ const quant_and_transfer_table quant_and_xfer_tables[12] {
_,_,0x4000,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,
_,_,_,_,0x4020}
},
// Quantization method 2, range 0..3
// QUANT_4, range 0..3
{
QUANT_4,
{0, 21, 43, 64},
{0, 1, 2, 3},
{0, 21, 43, 64},
{0, 21, 43, 64},
{0x1500,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,0x2b00,_,_,_,_,
_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,0x4015,_,_,_,_,_,_,_,_,_,_,_,_,
_,_,_,_,_,_,_,_,0x402b}
},
// Quantization method 3, range 0..4
//QUANT_5, range 0..4
{
QUANT_5,
{0, 16, 32, 48, 64},
{0, 1, 2, 3, 4},
{0, 16, 32, 48, 64},
@@ -64,9 +60,8 @@ const quant_and_transfer_table quant_and_xfer_tables[12] {
_,_,_,_,_,_,0x3010,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,0x4020,_,_,_,
_,_,_,_,_,_,_,_,_,_,_,_,0x4030}
},
// Quantization method 4, range 0..5
// QUANT_6, range 0..5
{
QUANT_6,
{0, 12, 25, 39, 52, 64},
{0, 2, 4, 5, 3, 1},
{0, 64, 12, 52, 25, 39},
@@ -74,9 +69,8 @@ const quant_and_transfer_table quant_and_xfer_tables[12] {
0x270c,_,_,_,_,_,_,_,_,_,_,_,_,_,0x3419,_,_,_,_,_,_,_,_,_,_,
_,_,0x4027,_,_,_,_,_,_,_,_,_,_,_,0x4034}
},
// Quantization method 5, range 0..7
// QUANT_8, range 0..7
{
QUANT_8,
{0, 9, 18, 27, 37, 46, 55, 64},
{0, 1, 2, 3, 4, 5, 6, 7},
{0, 9, 18, 27, 37, 46, 55, 64},
@@ -84,9 +78,8 @@ const quant_and_transfer_table quant_and_xfer_tables[12] {
_,_,_,_,_,_,0x2512,_,_,_,_,_,_,_,_,_,0x2e1b,_,_,_,_,_,_,_,_,
0x3725,_,_,_,_,_,_,_,_,0x402e,_,_,_,_,_,_,_,_,0x4037}
},
// Quantization method 6, range 0..9
// QUANT_10, range 0..9
{
QUANT_10,
{0, 7, 14, 21, 28, 36, 43, 50, 57, 64},
{0, 2, 4, 6, 8, 9, 7, 5, 3, 1},
{0, 64, 7, 57, 14, 50, 21, 43, 28, 36},
@@ -95,9 +88,8 @@ const quant_and_transfer_table quant_and_xfer_tables[12] {
_,0x3224,_,_,_,_,_,_,0x392b,_,_,_,_,_,_,0x4032,_,_,_,_,_,
_,0x4039}
},
// Quantization method 7, range 0..11
// QUANT_12, range 0..11
{
QUANT_12,
{0, 5, 11, 17, 23, 28, 36, 41, 47, 53, 59, 64},
{0, 4, 8, 2, 6, 10, 11, 7, 3, 9, 5, 1},
{0, 64, 17, 47, 5, 59, 23, 41, 11, 53, 28, 36},
@@ -106,9 +98,8 @@ const quant_and_transfer_table quant_and_xfer_tables[12] {
0x291c,_,_,_,_,0x2f24,_,_,_,_,_,0x3529,_,_,_,_,_,
0x3b2f,_,_,_,_,_,0x4035,_,_,_,_,0x403b}
},
// Quantization method 8, range 0..15
// QUANT_16, range 0..15
{
QUANT_16,
{0, 4, 8, 12, 17, 21, 25, 29, 35, 39, 43, 47, 52, 56, 60, 64},
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{0, 4, 8, 12, 17, 21, 25, 29, 35, 39, 43, 47, 52, 56, 60, 64},
@@ -117,9 +108,8 @@ const quant_and_transfer_table quant_and_xfer_tables[12] {
_,0x271d,_,_,_,0x2b23,_,_,_,0x2f27,_,_,_,0x342b,_,_,_,
_,0x382f,_,_,_,0x3c34,_,_,_,0x4038,_,_,_,0x403c}
},
// Quantization method 9, range 0..19
// QUANT_20, range 0..19
{
QUANT_20,
{0, 3, 6, 9, 13, 16, 19, 23, 26, 29, 35, 38, 41, 45, 48, 51, 55, 58, 61, 64},
{0, 4, 8, 12, 16, 2, 6, 10, 14, 18, 19, 15, 11, 7, 3, 17, 13, 9, 5, 1},
{0, 64, 16, 48, 3, 61, 19, 45, 6, 58, 23, 41, 9, 55, 26, 38, 13, 51, 29, 35},
@@ -129,9 +119,8 @@ const quant_and_transfer_table quant_and_xfer_tables[12] {
0x2d26,_,_,_,0x3029,_,_,0x332d,_,_,0x3730,_,_,_,
0x3a33,_,_,0x3d37,_,_,0x403a,_,_,0x403d}
},
// Quantization method 10, range 0..23
// QUANT_24, range 0..23
{
QUANT_24,
{0, 2, 5, 8, 11, 13, 16, 19, 22, 24, 27, 30, 34, 37, 40, 42, 45, 48, 51, 53, 56, 59, 62, 64},
{0, 8, 16, 2, 10, 18, 4, 12, 20, 6, 14, 22, 23, 15, 7, 21, 13, 5, 19, 11, 3, 17, 9, 1},
{0, 64, 8, 56, 16, 48, 24, 40, 2, 62, 11, 53, 19, 45, 27, 37, 5, 59, 13, 51, 22, 42, 30, 34},
@@ -142,9 +131,8 @@ const quant_and_transfer_table quant_and_xfer_tables[12] {
_,_,0x3530,_,0x3833,_,_,0x3b35,_,_,0x3e38,_,_,
0x403b,_,0x403e}
},
// Quantization method 11, range 0..31
// QUANT_32, range 0..31
{
QUANT_32,
{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64},
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31},
{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64},