ASTC encoding support

- Add 3rdparty/astc with lib version of the standard encoder
- Add astc_compress call for ASTC formats

- Add BIMG_CONFIG_ASTC_DECODE-gated decompression support. This is just for
  testing, the decompress code is currently too heavy to include in the core lib.
- Add fourcc support for DDS decode so ASTC and other formats not covered by
  dxgi can be read

- Add --formats option to texturec, lists all supported formats

- Update genie files -- add astc to bimg_encode and remove redundant files from
  texturec
This commit is contained in:
Andrew Willmott
2018-07-23 19:05:11 +01:00
parent 746f1053d7
commit 03ad3921ef
36 changed files with 40258 additions and 46 deletions

View File

@@ -0,0 +1,627 @@
/*----------------------------------------------------------------------------*/
/**
* This confidential and proprietary software may be used only as
* authorised by a licensing agreement from ARM Limited
* (C) COPYRIGHT 2011-2012 ARM Limited
* ALL RIGHTS RESERVED
*
* The entire notice above must be reproduced on all authorised
* copies and copies may only be made to the extent permitted
* by a licensing agreement from ARM Limited.
*
* @brief Implements functions for finding dominant direction of a set of
* colors, using ARM patent pending method.
*/
/*----------------------------------------------------------------------------*/
#include "astc_codec_internals.h"
#include <math.h>
#include "mathlib.h"
#ifdef DEBUG_CAPTURE_NAN
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <fenv.h>
#endif
/* routines to compute average colors and dominant directions for blocks with 3 and 4 components. */
/*
for a full block, functions to compute averages and dominant directions. The averages and directions are computed separately for each partition.
We have separate versions for blocks with and without alpha, since the processing for blocks with alpha is significantly more expensive.
The direction vectors it produces are NOT normalized.
*/
void compute_averages_and_directions_rgba(const partition_info * pt,
const imageblock * blk,
const error_weight_block * ewb,
const float4 * color_scalefactors,
float4 * averages, float4 * directions_rgba, float3 * directions_gba, float3 * directions_rba, float3 * directions_rga, float3 * directions_rgb)
{
int i;
int partition_count = pt->partition_count;
int partition;
for (partition = 0; partition < partition_count; partition++)
{
const uint8_t *weights = pt->texels_of_partition[partition];
int texelcount = pt->texels_per_partition[partition];
float4 base_sum = float4(0, 0, 0, 0);
float partition_weight = 0.0f;
for (i = 0; i < texelcount; i++)
{
int iwt = weights[i];
float weight = ewb->texel_weight[iwt];
float4 texel_datum = float4(blk->work_data[4 * iwt],
blk->work_data[4 * iwt + 1],
blk->work_data[4 * iwt + 2],
blk->work_data[4 * iwt + 3]) * weight;
partition_weight += weight;
base_sum = base_sum + texel_datum;
}
float4 average = base_sum * 1.0f / MAX(partition_weight, 1e-7f);
averages[partition] = average * color_scalefactors[partition];
float4 sum_xp = float4(0, 0, 0, 0);
float4 sum_yp = float4(0, 0, 0, 0);
float4 sum_zp = float4(0, 0, 0, 0);
float4 sum_wp = float4(0, 0, 0, 0);
for (i = 0; i < texelcount; i++)
{
int iwt = weights[i];
float weight = ewb->texel_weight[iwt];
float4 texel_datum = float4(blk->work_data[4 * iwt],
blk->work_data[4 * iwt + 1],
blk->work_data[4 * iwt + 2],
blk->work_data[4 * iwt + 3]);
texel_datum = (texel_datum - average) * weight;
if (texel_datum.x > 0.0f)
sum_xp = sum_xp + texel_datum;
if (texel_datum.y > 0.0f)
sum_yp = sum_yp + texel_datum;
if (texel_datum.z > 0.0f)
sum_zp = sum_zp + texel_datum;
if (texel_datum.w > 0.0f)
sum_wp = sum_wp + texel_datum;
}
float prod_xp = dot(sum_xp, sum_xp);
float prod_yp = dot(sum_yp, sum_yp);
float prod_zp = dot(sum_zp, sum_zp);
float prod_wp = dot(sum_wp, sum_wp);
float4 best_vector = sum_xp;
float best_sum = prod_xp;
if (prod_yp > best_sum)
{
best_vector = sum_yp;
best_sum = prod_yp;
}
if (prod_zp > best_sum)
{
best_vector = sum_zp;
best_sum = prod_zp;
}
if (prod_wp > best_sum)
{
best_vector = sum_wp;
best_sum = prod_wp;
}
directions_rgba[partition] = best_vector;
directions_rgb[partition] = best_vector.xyz;
directions_rga[partition] = best_vector.xyw;
directions_rba[partition] = best_vector.xzw;
directions_gba[partition] = best_vector.yzw;
}
}
void compute_averages_and_directions_rgb(const partition_info * pt,
const imageblock * blk,
const error_weight_block * ewb,
const float4 * color_scalefactors, float3 * averages, float3 * directions_rgb, float2 * directions_rg, float2 * directions_rb, float2 * directions_gb)
{
int i;
int partition_count = pt->partition_count;
int partition;
const float *texel_weights = ewb->texel_weight_rgb;
for (partition = 0; partition < partition_count; partition++)
{
const uint8_t *weights = pt->texels_of_partition[partition];
int texelcount = pt->texels_per_partition[partition];
float3 base_sum = float3(0, 0, 0);
float partition_weight = 0.0f;
for (i = 0; i < texelcount; i++)
{
int iwt = weights[i];
float weight = texel_weights[iwt];
float3 texel_datum = float3(blk->work_data[4 * iwt],
blk->work_data[4 * iwt + 1],
blk->work_data[4 * iwt + 2]) * weight;
partition_weight += weight;
base_sum = base_sum + texel_datum;
}
float4 csf = color_scalefactors[partition];
float3 average = base_sum * 1.0f / MAX(partition_weight, 1e-7f);
averages[partition] = average * csf.xyz;
float3 sum_xp = float3(0, 0, 0);
float3 sum_yp = float3(0, 0, 0);
float3 sum_zp = float3(0, 0, 0);
for (i = 0; i < texelcount; i++)
{
int iwt = weights[i];
float weight = texel_weights[iwt];
float3 texel_datum = float3(blk->work_data[4 * iwt],
blk->work_data[4 * iwt + 1],
blk->work_data[4 * iwt + 2]);
texel_datum = (texel_datum - average) * weight;
if (texel_datum.x > 0.0f)
sum_xp = sum_xp + texel_datum;
if (texel_datum.y > 0.0f)
sum_yp = sum_yp + texel_datum;
if (texel_datum.z > 0.0f)
sum_zp = sum_zp + texel_datum;
}
float prod_xp = dot(sum_xp, sum_xp);
float prod_yp = dot(sum_yp, sum_yp);
float prod_zp = dot(sum_zp, sum_zp);
float3 best_vector = sum_xp;
float best_sum = prod_xp;
if (prod_yp > best_sum)
{
best_vector = sum_yp;
best_sum = prod_yp;
}
if (prod_zp > best_sum)
{
best_vector = sum_zp;
best_sum = prod_zp;
}
directions_rgb[partition] = best_vector;
directions_gb[partition] = best_vector.yz;
directions_rb[partition] = best_vector.xz;
directions_rg[partition] = best_vector.xy;
}
}
void compute_averages_and_directions_3_components(const partition_info * pt,
const imageblock * blk,
const error_weight_block * ewb,
const float3 * color_scalefactors, int component1, int component2, int component3, float3 * averages, float3 * directions)
{
int i;
int partition_count = pt->partition_count;
int partition;
const float *texel_weights;
if (component1 == 1 && component2 == 2 && component3 == 3)
texel_weights = ewb->texel_weight_gba;
else if (component1 == 0 && component2 == 2 && component3 == 3)
texel_weights = ewb->texel_weight_rba;
else if (component1 == 0 && component2 == 1 && component3 == 3)
texel_weights = ewb->texel_weight_rga;
else if (component1 == 0 && component2 == 1 && component3 == 2)
texel_weights = ewb->texel_weight_rgb;
else
{
texel_weights = ewb->texel_weight_gba;
ASTC_CODEC_INTERNAL_ERROR;
}
for (partition = 0; partition < partition_count; partition++)
{
const uint8_t *weights = pt->texels_of_partition[partition];
int texelcount = pt->texels_per_partition[partition];
float3 base_sum = float3(0, 0, 0);
float partition_weight = 0.0f;
for (i = 0; i < texelcount; i++)
{
int iwt = weights[i];
float weight = texel_weights[iwt];
float3 texel_datum = float3(blk->work_data[4 * iwt + component1],
blk->work_data[4 * iwt + component2],
blk->work_data[4 * iwt + component3]) * weight;
partition_weight += weight;
base_sum = base_sum + texel_datum;
}
float3 csf = color_scalefactors[partition];
float3 average = base_sum * 1.0f / MAX(partition_weight, 1e-7f);
averages[partition] = average * csf.xyz;
float3 sum_xp = float3(0, 0, 0);
float3 sum_yp = float3(0, 0, 0);
float3 sum_zp = float3(0, 0, 0);
for (i = 0; i < texelcount; i++)
{
int iwt = weights[i];
float weight = texel_weights[iwt];
float3 texel_datum = float3(blk->work_data[4 * iwt + component1],
blk->work_data[4 * iwt + component2],
blk->work_data[4 * iwt + component3]);
texel_datum = (texel_datum - average) * weight;
if (texel_datum.x > 0.0f)
sum_xp = sum_xp + texel_datum;
if (texel_datum.y > 0.0f)
sum_yp = sum_yp + texel_datum;
if (texel_datum.z > 0.0f)
sum_zp = sum_zp + texel_datum;
}
float prod_xp = dot(sum_xp, sum_xp);
float prod_yp = dot(sum_yp, sum_yp);
float prod_zp = dot(sum_zp, sum_zp);
float3 best_vector = sum_xp;
float best_sum = prod_xp;
if (prod_yp > best_sum)
{
best_vector = sum_yp;
best_sum = prod_yp;
}
if (prod_zp > best_sum)
{
best_vector = sum_zp;
best_sum = prod_zp;
}
if (dot(best_vector, best_vector) < 1e-18)
best_vector = float3(1, 1, 1);
directions[partition] = best_vector;
}
}
void compute_averages_and_directions_2_components(const partition_info * pt,
const imageblock * blk,
const error_weight_block * ewb, const float2 * color_scalefactors, int component1, int component2, float2 * averages, float2 * directions)
{
int i;
int partition_count = pt->partition_count;
int partition;
const float *texel_weights;
if (component1 == 0 && component2 == 1)
texel_weights = ewb->texel_weight_rg;
else if (component1 == 0 && component2 == 2)
texel_weights = ewb->texel_weight_rb;
else if (component1 == 1 && component2 == 2)
texel_weights = ewb->texel_weight_gb;
else
{
texel_weights = ewb->texel_weight_rg;
// unsupported set of color components.
ASTC_CODEC_INTERNAL_ERROR;
exit(1);
}
for (partition = 0; partition < partition_count; partition++)
{
const uint8_t *weights = pt->texels_of_partition[partition];
int texelcount = pt->texels_per_partition[partition];
float2 base_sum = float2(0, 0);
float partition_weight = 0.0f;
for (i = 0; i < texelcount; i++)
{
int iwt = weights[i];
float weight = texel_weights[iwt];
float2 texel_datum = float2(blk->work_data[4 * iwt + component1],
blk->work_data[4 * iwt + component2]) * weight;
partition_weight += weight;
base_sum = base_sum + texel_datum;
}
float2 csf = color_scalefactors[partition];
float2 average = base_sum * 1.0f / MAX(partition_weight, 1e-7f);
averages[partition] = average * csf.xy;
float2 sum_xp = float2(0, 0);
float2 sum_yp = float2(0, 0);
for (i = 0; i < texelcount; i++)
{
int iwt = weights[i];
float weight = texel_weights[iwt];
float2 texel_datum = float2(blk->work_data[4 * iwt + component1],
blk->work_data[4 * iwt + component2]);
texel_datum = (texel_datum - average) * weight;
if (texel_datum.x > 0.0f)
sum_xp = sum_xp + texel_datum;
if (texel_datum.y > 0.0f)
sum_yp = sum_yp + texel_datum;
}
float prod_xp = dot(sum_xp, sum_xp);
float prod_yp = dot(sum_yp, sum_yp);
float2 best_vector = sum_xp;
float best_sum = prod_xp;
if (prod_yp > best_sum)
{
best_vector = sum_yp;
best_sum = prod_yp;
}
directions[partition] = best_vector;
}
}
#define XPASTE(x,y) x##y
#define PASTE(x,y) XPASTE(x,y)
#define TWO_COMPONENT_ERROR_FUNC( funcname, c0_iwt, c1_iwt, c01_name, c01_rname ) \
float funcname( \
const partition_info *pt, \
const imageblock *blk, \
const error_weight_block *ewb, \
const processed_line2 *plines, \
float *length_of_lines \
) \
{ \
int i; \
float errorsum = 0.0f; \
int partition; \
for(partition=0; partition<pt->partition_count; partition++) \
{ \
const uint8_t *weights = pt->texels_of_partition[ partition ]; \
int texelcount = pt->texels_per_partition[ partition ]; \
float lowparam = 1e10f; \
float highparam = -1e10f; \
processed_line2 l = plines[partition]; \
if( ewb->contains_zeroweight_texels ) \
{ \
for(i=0;i<texelcount;i++) \
{ \
int iwt = weights[i]; \
float texel_weight = ewb-> PASTE(texel_weight_ , c01_rname) [i]; \
if( texel_weight > 1e-20f ) \
{ \
float2 point = float2(blk->work_data[4*iwt + c0_iwt], blk->work_data[4*iwt + c1_iwt] ); \
float param = dot( point, l.bs ); \
float2 rp1 = l.amod + param*l.bis; \
float2 dist = rp1 - point; \
float4 ews = ewb->error_weights[iwt]; \
errorsum += dot( ews. c01_name, dist*dist ); \
if( param < lowparam ) lowparam = param; \
if( param > highparam ) highparam = param; \
} \
} \
} \
else \
{ \
for(i=0;i<texelcount;i++) \
{ \
int iwt = weights[i]; \
float2 point = float2(blk->work_data[4*iwt + c0_iwt], blk->work_data[4*iwt + c1_iwt] ); \
float param = dot( point, l.bs ); \
float2 rp1 = l.amod + param*l.bis; \
float2 dist = rp1 - point; \
float4 ews = ewb->error_weights[iwt]; \
errorsum += dot( ews. c01_name, dist*dist ); \
if( param < lowparam ) lowparam = param; \
if( param > highparam ) highparam = param; \
} \
} \
float linelen = highparam - lowparam; \
if( !(linelen > 1e-7f) ) \
linelen = 1e-7f; \
length_of_lines[partition] = linelen; \
} \
return errorsum; \
}
TWO_COMPONENT_ERROR_FUNC(compute_error_squared_rg, 0, 1, xy, rg)
TWO_COMPONENT_ERROR_FUNC(compute_error_squared_rb, 0, 2, xz, rb)
TWO_COMPONENT_ERROR_FUNC(compute_error_squared_gb, 1, 2, yz, gb)
TWO_COMPONENT_ERROR_FUNC(compute_error_squared_ra, 0, 3, zw, ra)
// function to compute the error across a tile when using a particular set of
// lines for a particular partitioning. Also compute the length of each
// color-space line in each partitioning.
#define THREE_COMPONENT_ERROR_FUNC( funcname, c0_iwt, c1_iwt, c2_iwt, c012_name, c012_rname ) \
float funcname( \
const partition_info *pt, \
const imageblock *blk, \
const error_weight_block *ewb, \
const processed_line3 *plines, \
float *length_of_lines \
) \
{ \
int i; \
float errorsum = 0.0f; \
int partition; \
for(partition=0; partition<pt->partition_count; partition++) \
{ \
const uint8_t *weights = pt->texels_of_partition[ partition ]; \
int texelcount = pt->texels_per_partition[ partition ]; \
float lowparam = 1e10f; \
float highparam = -1e10f; \
processed_line3 l = plines[partition]; \
if( ewb->contains_zeroweight_texels ) \
{ \
for(i=0;i<texelcount;i++) \
{ \
int iwt = weights[i]; \
float texel_weight = ewb-> PASTE(texel_weight_ , c012_rname) [i]; \
if( texel_weight > 1e-20f ) \
{ \
float3 point = float3(blk->work_data[4*iwt + c0_iwt], blk->work_data[4*iwt + c1_iwt], blk->work_data[4*iwt + c2_iwt] ); \
float param = dot( point, l.bs ); \
float3 rp1 = l.amod + param*l.bis; \
float3 dist = rp1 - point; \
float4 ews = ewb->error_weights[iwt]; \
errorsum += dot( ews. c012_name, dist*dist ); \
if( param < lowparam ) lowparam = param; \
if( param > highparam ) highparam = param; \
} \
} \
} \
else \
{ \
for(i=0;i<texelcount;i++) \
{ \
int iwt = weights[i]; \
float3 point = float3(blk->work_data[4*iwt + c0_iwt], blk->work_data[4*iwt + c1_iwt], blk->work_data[4*iwt + c2_iwt] ); \
float param = dot( point, l.bs ); \
float3 rp1 = l.amod + param*l.bis; \
float3 dist = rp1 - point; \
float4 ews = ewb->error_weights[iwt]; \
errorsum += dot( ews. c012_name, dist*dist ); \
if( param < lowparam ) lowparam = param; \
if( param > highparam ) highparam = param; \
} \
} \
float linelen = highparam - lowparam; \
if( !(linelen > 1e-7f) ) \
linelen = 1e-7f; \
length_of_lines[partition] = linelen; \
} \
return errorsum; \
}
THREE_COMPONENT_ERROR_FUNC(compute_error_squared_gba, 1, 2, 3, yzw, gba)
THREE_COMPONENT_ERROR_FUNC(compute_error_squared_rba, 0, 2, 3, xzw, rba)
THREE_COMPONENT_ERROR_FUNC(compute_error_squared_rga, 0, 1, 3, xyw, rga)
THREE_COMPONENT_ERROR_FUNC(compute_error_squared_rgb, 0, 1, 2, xyz, rgb)
float compute_error_squared_rgba(const partition_info * pt, // the partition that we use when computing the squared-error.
const imageblock * blk, const error_weight_block * ewb, const processed_line4 * plines, float *length_of_lines)
{
int i;
float errorsum = 0.0f;
int partition;
for (partition = 0; partition < pt->partition_count; partition++)
{
const uint8_t *weights = pt->texels_of_partition[partition];
int texelcount = pt->texels_per_partition[partition];
float lowparam = 1e10;
float highparam = -1e10;
processed_line4 l = plines[partition];
if (ewb->contains_zeroweight_texels)
{
for (i = 0; i < texelcount; i++)
{
int iwt = weights[i];
if (ewb->texel_weight[iwt] > 1e-20)
{
float4 point = float4(blk->work_data[4 * iwt], blk->work_data[4 * iwt + 1], blk->work_data[4 * iwt + 2], blk->work_data[4 * iwt + 3]);
float param = dot(point, l.bs);
float4 rp1 = l.amod + param * l.bis;
float4 dist = rp1 - point;
float4 ews = ewb->error_weights[iwt];
errorsum += dot(ews, dist * dist);
if (param < lowparam)
lowparam = param;
if (param > highparam)
highparam = param;
}
}
}
else
{
for (i = 0; i < texelcount; i++)
{
int iwt = weights[i];
float4 point = float4(blk->work_data[4 * iwt], blk->work_data[4 * iwt + 1], blk->work_data[4 * iwt + 2], blk->work_data[4 * iwt + 3]);
float param = dot(point, l.bs);
float4 rp1 = l.amod + param * l.bis;
float4 dist = rp1 - point;
float4 ews = ewb->error_weights[iwt];
errorsum += dot(ews, dist * dist);
if (param < lowparam)
lowparam = param;
if (param > highparam)
highparam = param;
}
}
float linelen = highparam - lowparam;
if (!(linelen > 1e-7f))
linelen = 1e-7f;
length_of_lines[partition] = linelen;
}
return errorsum;
}
// function to compute the error across a tile when using a particular line for
// a particular partition.
float compute_error_squared_rgb_single_partition(int partition_to_test, int xdim, int ydim, int zdim, const partition_info * pt, // the partition that we use when computing the squared-error.
const imageblock * blk, const error_weight_block * ewb, const processed_line3 * lin // the line for the partition.
)
{
int i;
int texels_per_block = xdim * ydim * zdim;
float errorsum = 0.0f;
for (i = 0; i < texels_per_block; i++)
{
int partition = pt->partition_of_texel[i];
float texel_weight = ewb->texel_weight_rgb[i];
if (partition != partition_to_test || texel_weight < 1e-20)
continue;
float3 point = float3(blk->work_data[4 * i], blk->work_data[4 * i + 1], blk->work_data[4 * i + 2]);
float param = dot(point, lin->bs);
float3 rp1 = lin->amod + param * lin->bis;
float3 dist = rp1 - point;
float4 ews = ewb->error_weights[i];
errorsum += dot(ews.xyz, dist * dist);
}
return errorsum;
}

977
3rdparty/astc/astc_block_sizes2.cpp vendored Normal file
View File

@@ -0,0 +1,977 @@
/*----------------------------------------------------------------------------*/
/**
* This confidential and proprietary software may be used only as
* authorised by a licensing agreement from ARM Limited
* (C) COPYRIGHT 2011-2012 ARM Limited
* ALL RIGHTS RESERVED
*
* The entire notice above must be reproduced on all authorised
* copies and copies may only be made to the extent permitted
* by a licensing agreement from ARM Limited.
*
* @brief For ASTC, generate the block size descriptor and the associated
* decimation tables.
*/
/*----------------------------------------------------------------------------*/
#include "astc_codec_internals.h"
extern const float percentile_table_4x4[2048];
extern const float percentile_table_4x5[2048];
extern const float percentile_table_4x6[2048];
extern const float percentile_table_4x8[2048];
extern const float percentile_table_4x10[2048];
extern const float percentile_table_4x12[2048];
extern const float percentile_table_5x4[2048];
extern const float percentile_table_5x5[2048];
extern const float percentile_table_5x6[2048];
extern const float percentile_table_5x8[2048];
extern const float percentile_table_5x10[2048];
extern const float percentile_table_5x12[2048];
extern const float percentile_table_6x4[2048];
extern const float percentile_table_6x5[2048];
extern const float percentile_table_6x6[2048];
extern const float percentile_table_6x8[2048];
extern const float percentile_table_6x10[2048];
extern const float percentile_table_6x12[2048];
extern const float percentile_table_8x4[2048];
extern const float percentile_table_8x5[2048];
extern const float percentile_table_8x6[2048];
extern const float percentile_table_8x8[2048];
extern const float percentile_table_8x10[2048];
extern const float percentile_table_8x12[2048];
extern const float percentile_table_10x4[2048];
extern const float percentile_table_10x5[2048];
extern const float percentile_table_10x6[2048];
extern const float percentile_table_10x8[2048];
extern const float percentile_table_10x10[2048];
extern const float percentile_table_10x12[2048];
extern const float percentile_table_12x4[2048];
extern const float percentile_table_12x5[2048];
extern const float percentile_table_12x6[2048];
extern const float percentile_table_12x8[2048];
extern const float percentile_table_12x10[2048];
extern const float percentile_table_12x12[2048];
const float *get_2d_percentile_table(int blockdim_x, int blockdim_y)
{
switch (blockdim_x)
{
case 4:
switch (blockdim_y)
{
case 4:
return percentile_table_4x4;
case 5:
return percentile_table_4x5;
case 6:
return percentile_table_4x6;
case 8:
return percentile_table_4x8;
case 10:
return percentile_table_4x10;
case 12:
return percentile_table_4x12;
}
break;
case 5:
switch (blockdim_y)
{
case 4:
return percentile_table_5x4;
case 5:
return percentile_table_5x5;
case 6:
return percentile_table_5x6;
case 8:
return percentile_table_5x8;
case 10:
return percentile_table_5x10;
case 12:
return percentile_table_5x12;
}
break;
case 6:
switch (blockdim_y)
{
case 4:
return percentile_table_6x4;
case 5:
return percentile_table_6x5;
case 6:
return percentile_table_6x6;
case 8:
return percentile_table_6x8;
case 10:
return percentile_table_6x10;
case 12:
return percentile_table_6x12;
}
break;
case 8:
switch (blockdim_y)
{
case 4:
return percentile_table_8x4;
case 5:
return percentile_table_8x5;
case 6:
return percentile_table_8x6;
case 8:
return percentile_table_8x8;
case 10:
return percentile_table_8x10;
case 12:
return percentile_table_8x12;
}
break;
case 10:
switch (blockdim_y)
{
case 4:
return percentile_table_10x4;
case 5:
return percentile_table_10x5;
case 6:
return percentile_table_10x6;
case 8:
return percentile_table_10x8;
case 10:
return percentile_table_10x10;
case 12:
return percentile_table_10x12;
}
break;
case 12:
switch (blockdim_y)
{
case 4:
return percentile_table_12x4;
case 5:
return percentile_table_12x5;
case 6:
return percentile_table_12x6;
case 8:
return percentile_table_12x8;
case 10:
return percentile_table_12x10;
case 12:
return percentile_table_12x12;
}
break;
default:
break;
}
return NULL; // should never happen.
}
// stubbed for the time being.
static const float dummy_percentile_table_3d[2048] = { 0 };
const float *get_3d_percentile_table(int blockdim_x, int blockdim_y, int blockdim_z)
{
IGNORE(blockdim_x);
IGNORE(blockdim_y);
IGNORE(blockdim_z);
return dummy_percentile_table_3d;
}
// return 0 on invalid mode, 1 on valid mode.
static int decode_block_mode_2d(int blockmode, int *Nval, int *Mval, int *dual_weight_plane, int *quant_mode)
{
int base_quant_mode = (blockmode >> 4) & 1;
int H = (blockmode >> 9) & 1;
int D = (blockmode >> 10) & 1;
int A = (blockmode >> 5) & 0x3;
int N = 0, M = 0;
if ((blockmode & 3) != 0)
{
base_quant_mode |= (blockmode & 3) << 1;
int B = (blockmode >> 7) & 3;
switch ((blockmode >> 2) & 3)
{
case 0:
N = B + 4;
M = A + 2;
break;
case 1:
N = B + 8;
M = A + 2;
break;
case 2:
N = A + 2;
M = B + 8;
break;
case 3:
B &= 1;
if (blockmode & 0x100)
{
N = B + 2;
M = A + 2;
}
else
{
N = A + 2;
M = B + 6;
}
break;
}
}
else
{
base_quant_mode |= ((blockmode >> 2) & 3) << 1;
if (((blockmode >> 2) & 3) == 0)
return 0;
int B = (blockmode >> 9) & 3;
switch ((blockmode >> 7) & 3)
{
case 0:
N = 12;
M = A + 2;
break;
case 1:
N = A + 2;
M = 12;
break;
case 2:
N = A + 6;
M = B + 6;
D = 0;
H = 0;
break;
case 3:
switch ((blockmode >> 5) & 3)
{
case 0:
N = 6;
M = 10;
break;
case 1:
N = 10;
M = 6;
break;
case 2:
case 3:
return 0;
}
break;
}
}
int weight_count = N * M * (D + 1);
int qmode = (base_quant_mode - 2) + 6 * H;
int weightbits = compute_ise_bitcount(weight_count, (quantization_method) qmode);
if (weight_count > MAX_WEIGHTS_PER_BLOCK || weightbits < MIN_WEIGHT_BITS_PER_BLOCK || weightbits > MAX_WEIGHT_BITS_PER_BLOCK)
return 0;
*Nval = N;
*Mval = M;
*dual_weight_plane = D;
*quant_mode = qmode;
return 1;
}
static int decode_block_mode_3d(int blockmode, int *Nval, int *Mval, int *Qval, int *dual_weight_plane, int *quant_mode)
{
int base_quant_mode = (blockmode >> 4) & 1;
int H = (blockmode >> 9) & 1;
int D = (blockmode >> 10) & 1;
int A = (blockmode >> 5) & 0x3;
int N = 0, M = 0, Q = 0;
if ((blockmode & 3) != 0)
{
base_quant_mode |= (blockmode & 3) << 1;
int B = (blockmode >> 7) & 3;
int C = (blockmode >> 2) & 0x3;
N = A + 2;
M = B + 2;
Q = C + 2;
}
else
{
base_quant_mode |= ((blockmode >> 2) & 3) << 1;
if (((blockmode >> 2) & 3) == 0)
return 0;
int B = (blockmode >> 9) & 3;
if (((blockmode >> 7) & 3) != 3)
{
D = 0;
H = 0;
}
switch ((blockmode >> 7) & 3)
{
case 0:
N = 6;
M = B + 2;
Q = A + 2;
break;
case 1:
N = A + 2;
M = 6;
Q = B + 2;
break;
case 2:
N = A + 2;
M = B + 2;
Q = 6;
break;
case 3:
N = 2;
M = 2;
Q = 2;
switch ((blockmode >> 5) & 3)
{
case 0:
N = 6;
break;
case 1:
M = 6;
break;
case 2:
Q = 6;
break;
case 3:
return 0;
}
break;
}
}
int weight_count = N * M * Q * (D + 1);
int qmode = (base_quant_mode - 2) + 6 * H;
int weightbits = compute_ise_bitcount(weight_count, (quantization_method) qmode);
if (weight_count > MAX_WEIGHTS_PER_BLOCK || weightbits < MIN_WEIGHT_BITS_PER_BLOCK || weightbits > MAX_WEIGHT_BITS_PER_BLOCK)
return 0;
*Nval = N;
*Mval = M;
*Qval = Q;
*dual_weight_plane = D;
*quant_mode = qmode;
return 1;
}
static void initialize_decimation_table_2d(
// dimensions of the block
int xdim, int ydim,
// number of grid points in 2d weight grid
int x_weights, int y_weights, decimation_table * dt)
{
int i, j;
int x, y;
int texels_per_block = xdim * ydim;
int weights_per_block = x_weights * y_weights;
int weightcount_of_texel[MAX_TEXELS_PER_BLOCK];
int grid_weights_of_texel[MAX_TEXELS_PER_BLOCK][4];
int weights_of_texel[MAX_TEXELS_PER_BLOCK][4];
int texelcount_of_weight[MAX_WEIGHTS_PER_BLOCK];
int texels_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];
int texelweights_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];
for (i = 0; i < weights_per_block; i++)
texelcount_of_weight[i] = 0;
for (i = 0; i < texels_per_block; i++)
weightcount_of_texel[i] = 0;
for (y = 0; y < ydim; y++)
for (x = 0; x < xdim; x++)
{
int texel = y * xdim + x;
int x_weight = (((1024 + xdim / 2) / (xdim - 1)) * x * (x_weights - 1) + 32) >> 6;
int y_weight = (((1024 + ydim / 2) / (ydim - 1)) * y * (y_weights - 1) + 32) >> 6;
int x_weight_frac = x_weight & 0xF;
int y_weight_frac = y_weight & 0xF;
int x_weight_int = x_weight >> 4;
int y_weight_int = y_weight >> 4;
int qweight[4];
int weight[4];
qweight[0] = x_weight_int + y_weight_int * x_weights;
qweight[1] = qweight[0] + 1;
qweight[2] = qweight[0] + x_weights;
qweight[3] = qweight[2] + 1;
// truncated-precision bilinear interpolation.
int prod = x_weight_frac * y_weight_frac;
weight[3] = (prod + 8) >> 4;
weight[1] = x_weight_frac - weight[3];
weight[2] = y_weight_frac - weight[3];
weight[0] = 16 - x_weight_frac - y_weight_frac + weight[3];
for (i = 0; i < 4; i++)
if (weight[i] != 0)
{
grid_weights_of_texel[texel][weightcount_of_texel[texel]] = qweight[i];
weights_of_texel[texel][weightcount_of_texel[texel]] = weight[i];
weightcount_of_texel[texel]++;
texels_of_weight[qweight[i]][texelcount_of_weight[qweight[i]]] = texel;
texelweights_of_weight[qweight[i]][texelcount_of_weight[qweight[i]]] = weight[i];
texelcount_of_weight[qweight[i]]++;
}
}
for (i = 0; i < texels_per_block; i++)
{
dt->texel_num_weights[i] = weightcount_of_texel[i];
// ensure that all 4 entries are actually initialized.
// This allows a branch-free implementation of compute_value_of_texel_flt()
for (j = 0; j < 4; j++)
{
dt->texel_weights_int[i][j] = 0;
dt->texel_weights_float[i][j] = 0.0f;
dt->texel_weights[i][j] = 0;
}
for (j = 0; j < weightcount_of_texel[i]; j++)
{
dt->texel_weights_int[i][j] = weights_of_texel[i][j];
dt->texel_weights_float[i][j] = static_cast < float >(weights_of_texel[i][j]) * (1.0f / TEXEL_WEIGHT_SUM);
dt->texel_weights[i][j] = grid_weights_of_texel[i][j];
}
}
for (i = 0; i < weights_per_block; i++)
{
dt->weight_num_texels[i] = texelcount_of_weight[i];
for (j = 0; j < texelcount_of_weight[i]; j++)
{
dt->weight_texel[i][j] = texels_of_weight[i][j];
dt->weights_int[i][j] = texelweights_of_weight[i][j];
dt->weights_flt[i][j] = static_cast < float >(texelweights_of_weight[i][j]);
}
}
dt->num_texels = texels_per_block;
dt->num_weights = weights_per_block;
}
static void initialize_decimation_table_3d(
// dimensions of the block
int xdim, int ydim, int zdim,
// number of grid points in 3d weight grid
int x_weights, int y_weights, int z_weights, decimation_table * dt)
{
int i, j;
int x, y, z;
int texels_per_block = xdim * ydim * zdim;
int weights_per_block = x_weights * y_weights * z_weights;
int weightcount_of_texel[MAX_TEXELS_PER_BLOCK];
int grid_weights_of_texel[MAX_TEXELS_PER_BLOCK][4];
int weights_of_texel[MAX_TEXELS_PER_BLOCK][4];
int texelcount_of_weight[MAX_WEIGHTS_PER_BLOCK];
int texels_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];
int texelweights_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];
for (i = 0; i < weights_per_block; i++)
texelcount_of_weight[i] = 0;
for (i = 0; i < texels_per_block; i++)
weightcount_of_texel[i] = 0;
for (z = 0; z < zdim; z++)
for (y = 0; y < ydim; y++)
for (x = 0; x < xdim; x++)
{
int texel = (z * ydim + y) * xdim + x;
int x_weight = (((1024 + xdim / 2) / (xdim - 1)) * x * (x_weights - 1) + 32) >> 6;
int y_weight = (((1024 + ydim / 2) / (ydim - 1)) * y * (y_weights - 1) + 32) >> 6;
int z_weight = (((1024 + zdim / 2) / (zdim - 1)) * z * (z_weights - 1) + 32) >> 6;
int x_weight_frac = x_weight & 0xF;
int y_weight_frac = y_weight & 0xF;
int z_weight_frac = z_weight & 0xF;
int x_weight_int = x_weight >> 4;
int y_weight_int = y_weight >> 4;
int z_weight_int = z_weight >> 4;
int qweight[4];
int weight[4];
qweight[0] = (z_weight_int * y_weights + y_weight_int) * x_weights + x_weight_int;
qweight[3] = ((z_weight_int + 1) * y_weights + (y_weight_int + 1)) * x_weights + (x_weight_int + 1);
// simplex interpolation
int fs = x_weight_frac;
int ft = y_weight_frac;
int fp = z_weight_frac;
int cas = ((fs > ft) << 2) + ((ft > fp) << 1) + ((fs > fp));
int N = x_weights;
int NM = x_weights * y_weights;
int s1, s2, w0, w1, w2, w3;
switch (cas)
{
case 7:
s1 = 1;
s2 = N;
w0 = 16 - fs;
w1 = fs - ft;
w2 = ft - fp;
w3 = fp;
break;
case 3:
s1 = N;
s2 = 1;
w0 = 16 - ft;
w1 = ft - fs;
w2 = fs - fp;
w3 = fp;
break;
case 5:
s1 = 1;
s2 = NM;
w0 = 16 - fs;
w1 = fs - fp;
w2 = fp - ft;
w3 = ft;
break;
case 4:
s1 = NM;
s2 = 1;
w0 = 16 - fp;
w1 = fp - fs;
w2 = fs - ft;
w3 = ft;
break;
case 2:
s1 = N;
s2 = NM;
w0 = 16 - ft;
w1 = ft - fp;
w2 = fp - fs;
w3 = fs;
break;
case 0:
s1 = NM;
s2 = N;
w0 = 16 - fp;
w1 = fp - ft;
w2 = ft - fs;
w3 = fs;
break;
default:
s1 = NM;
s2 = N;
w0 = 16 - fp;
w1 = fp - ft;
w2 = ft - fs;
w3 = fs;
break;
}
qweight[1] = qweight[0] + s1;
qweight[2] = qweight[1] + s2;
weight[0] = w0;
weight[1] = w1;
weight[2] = w2;
weight[3] = w3;
/*
for(i=0;i<4;i++) weight[i] <<= 4; */
for (i = 0; i < 4; i++)
if (weight[i] != 0)
{
grid_weights_of_texel[texel][weightcount_of_texel[texel]] = qweight[i];
weights_of_texel[texel][weightcount_of_texel[texel]] = weight[i];
weightcount_of_texel[texel]++;
texels_of_weight[qweight[i]][texelcount_of_weight[qweight[i]]] = texel;
texelweights_of_weight[qweight[i]][texelcount_of_weight[qweight[i]]] = weight[i];
texelcount_of_weight[qweight[i]]++;
}
}
for (i = 0; i < texels_per_block; i++)
{
dt->texel_num_weights[i] = weightcount_of_texel[i];
// ensure that all 4 entries are actually initialized.
// This allows a branch-free implementation of compute_value_of_texel_flt()
for (j = 0; j < 4; j++)
{
dt->texel_weights_int[i][j] = 0;
dt->texel_weights_float[i][j] = 0.0f;
dt->texel_weights[i][j] = 0;
}
for (j = 0; j < weightcount_of_texel[i]; j++)
{
dt->texel_weights_int[i][j] = weights_of_texel[i][j];
dt->texel_weights_float[i][j] = static_cast < float >(weights_of_texel[i][j]) * (1.0f / TEXEL_WEIGHT_SUM);
dt->texel_weights[i][j] = grid_weights_of_texel[i][j];
}
}
for (i = 0; i < weights_per_block; i++)
{
dt->weight_num_texels[i] = texelcount_of_weight[i];
for (j = 0; j < texelcount_of_weight[i]; j++)
{
dt->weight_texel[i][j] = texels_of_weight[i][j];
dt->weights_int[i][j] = texelweights_of_weight[i][j];
dt->weights_flt[i][j] = static_cast < float >(texelweights_of_weight[i][j]);
}
}
dt->num_texels = texels_per_block;
dt->num_weights = weights_per_block;
}
void construct_block_size_descriptor_2d(int xdim, int ydim, block_size_descriptor * bsd)
{
int decimation_mode_index[256]; // for each of the 256 entries in the decim_table_array, its index
int decimation_mode_count = 0;
int i;
int x_weights;
int y_weights;
for (i = 0; i < 256; i++)
{
decimation_mode_index[i] = -1;
}
// gather all the infill-modes that can be used with the current block size
for (x_weights = 2; x_weights <= 12; x_weights++)
for (y_weights = 2; y_weights <= 12; y_weights++)
{
if (x_weights * y_weights > MAX_WEIGHTS_PER_BLOCK)
continue;
decimation_table *dt = new decimation_table;
decimation_mode_index[y_weights * 16 + x_weights] = decimation_mode_count;
initialize_decimation_table_2d(xdim, ydim, x_weights, y_weights, dt);
int weight_count = x_weights * y_weights;
int maxprec_1plane = -1;
int maxprec_2planes = -1;
for (i = 0; i < 12; i++)
{
int bits_1plane = compute_ise_bitcount(weight_count, (quantization_method) i);
int bits_2planes = compute_ise_bitcount(2 * weight_count, (quantization_method) i);
if (bits_1plane >= MIN_WEIGHT_BITS_PER_BLOCK && bits_1plane <= MAX_WEIGHT_BITS_PER_BLOCK)
maxprec_1plane = i;
if (bits_2planes >= MIN_WEIGHT_BITS_PER_BLOCK && bits_2planes <= MAX_WEIGHT_BITS_PER_BLOCK)
maxprec_2planes = i;
}
if (2 * x_weights * y_weights > MAX_WEIGHTS_PER_BLOCK)
maxprec_2planes = -1;
bsd->permit_encode[decimation_mode_count] = (x_weights <= xdim && y_weights <= ydim);
bsd->decimation_mode_samples[decimation_mode_count] = weight_count;
bsd->decimation_mode_maxprec_1plane[decimation_mode_count] = maxprec_1plane;
bsd->decimation_mode_maxprec_2planes[decimation_mode_count] = maxprec_2planes;
bsd->decimation_tables[decimation_mode_count] = dt;
decimation_mode_count++;
}
for (i = 0; i < MAX_DECIMATION_MODES; i++)
{
bsd->decimation_mode_percentile[i] = 1.0f;
}
for (i = decimation_mode_count; i < MAX_DECIMATION_MODES; i++)
{
bsd->permit_encode[i] = 0;
bsd->decimation_mode_samples[i] = 0;
bsd->decimation_mode_maxprec_1plane[i] = -1;
bsd->decimation_mode_maxprec_2planes[i] = -1;
}
bsd->decimation_mode_count = decimation_mode_count;
const float *percentiles = get_2d_percentile_table(xdim, ydim);
// then construct the list of block formats
for (i = 0; i < 2048; i++)
{
int x_weights, y_weights;
int is_dual_plane;
int quantization_mode;
int fail = 0;
int permit_encode = 1;
if (decode_block_mode_2d(i, &x_weights, &y_weights, &is_dual_plane, &quantization_mode))
{
if (x_weights > xdim || y_weights > ydim)
permit_encode = 0;
}
else
{
fail = 1;
permit_encode = 0;
}
if (fail)
{
bsd->block_modes[i].decimation_mode = -1;
bsd->block_modes[i].quantization_mode = -1;
bsd->block_modes[i].is_dual_plane = -1;
bsd->block_modes[i].permit_encode = 0;
bsd->block_modes[i].permit_decode = 0;
bsd->block_modes[i].percentile = 1.0f;
}
else
{
int decimation_mode = decimation_mode_index[y_weights * 16 + x_weights];
bsd->block_modes[i].decimation_mode = decimation_mode;
bsd->block_modes[i].quantization_mode = quantization_mode;
bsd->block_modes[i].is_dual_plane = is_dual_plane;
bsd->block_modes[i].permit_encode = permit_encode;
bsd->block_modes[i].permit_decode = permit_encode; // disallow decode of grid size larger than block size.
bsd->block_modes[i].percentile = percentiles[i];
if (bsd->decimation_mode_percentile[decimation_mode] > percentiles[i])
bsd->decimation_mode_percentile[decimation_mode] = percentiles[i];
}
}
if (xdim * ydim <= 64)
{
bsd->texelcount_for_bitmap_partitioning = xdim * ydim;
for (i = 0; i < xdim * ydim; i++)
bsd->texels_for_bitmap_partitioning[i] = i;
}
else
{
// pick 64 random texels for use with bitmap partitioning.
int arr[MAX_TEXELS_PER_BLOCK];
for (i = 0; i < xdim * ydim; i++)
arr[i] = 0;
int arr_elements_set = 0;
while (arr_elements_set < 64)
{
int idx = rand() % (xdim * ydim);
if (arr[idx] == 0)
{
arr_elements_set++;
arr[idx] = 1;
}
}
int texel_weights_written = 0;
int idx = 0;
while (texel_weights_written < 64)
{
if (arr[idx])
bsd->texels_for_bitmap_partitioning[texel_weights_written++] = idx;
idx++;
}
bsd->texelcount_for_bitmap_partitioning = 64;
}
}
void construct_block_size_descriptor_3d(int xdim, int ydim, int zdim, block_size_descriptor * bsd)
{
int decimation_mode_index[512]; // for each of the 512 entries in the decim_table_array, its index
int decimation_mode_count = 0;
int i;
int x_weights;
int y_weights;
int z_weights;
for (i = 0; i < 512; i++)
{
decimation_mode_index[i] = -1;
}
// gather all the infill-modes that can be used with the current block size
for (x_weights = 2; x_weights <= 6; x_weights++)
for (y_weights = 2; y_weights <= 6; y_weights++)
for (z_weights = 2; z_weights <= 6; z_weights++)
{
if ((x_weights * y_weights * z_weights) > MAX_WEIGHTS_PER_BLOCK)
continue;
decimation_table *dt = new decimation_table;
decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights] = decimation_mode_count;
initialize_decimation_table_3d(xdim, ydim, zdim, x_weights, y_weights, z_weights, dt);
int weight_count = x_weights * y_weights * z_weights;
int maxprec_1plane = -1;
int maxprec_2planes = -1;
for (i = 0; i < 12; i++)
{
int bits_1plane = compute_ise_bitcount(weight_count, (quantization_method) i);
int bits_2planes = compute_ise_bitcount(2 * weight_count, (quantization_method) i);
if (bits_1plane >= MIN_WEIGHT_BITS_PER_BLOCK && bits_1plane <= MAX_WEIGHT_BITS_PER_BLOCK)
maxprec_1plane = i;
if (bits_2planes >= MIN_WEIGHT_BITS_PER_BLOCK && bits_2planes <= MAX_WEIGHT_BITS_PER_BLOCK)
maxprec_2planes = i;
}
if ((2 * x_weights * y_weights * z_weights) > MAX_WEIGHTS_PER_BLOCK)
maxprec_2planes = -1;
bsd->permit_encode[decimation_mode_count] = (x_weights <= xdim && y_weights <= ydim && z_weights <= zdim);
bsd->decimation_mode_samples[decimation_mode_count] = weight_count;
bsd->decimation_mode_maxprec_1plane[decimation_mode_count] = maxprec_1plane;
bsd->decimation_mode_maxprec_2planes[decimation_mode_count] = maxprec_2planes;
bsd->decimation_tables[decimation_mode_count] = dt;
decimation_mode_count++;
}
for (i = 0; i < MAX_DECIMATION_MODES; i++)
{
bsd->decimation_mode_percentile[i] = 1.0f;
}
for (i = decimation_mode_count; i < MAX_DECIMATION_MODES; i++)
{
bsd->permit_encode[i] = 0;
bsd->decimation_mode_samples[i] = 0;
bsd->decimation_mode_maxprec_1plane[i] = -1;
bsd->decimation_mode_maxprec_2planes[i] = -1;
}
bsd->decimation_mode_count = decimation_mode_count;
const float *percentiles = get_3d_percentile_table(xdim, ydim, zdim);
// then construct the list of block formats
for (i = 0; i < 2048; i++)
{
int x_weights, y_weights, z_weights;
int is_dual_plane;
int quantization_mode;
int fail = 0;
int permit_encode = 1;
if (decode_block_mode_3d(i, &x_weights, &y_weights, &z_weights, &is_dual_plane, &quantization_mode))
{
if (x_weights > xdim || y_weights > ydim || z_weights > zdim)
permit_encode = 0;
}
else
{
fail = 1;
permit_encode = 0;
}
if (fail)
{
bsd->block_modes[i].decimation_mode = -1;
bsd->block_modes[i].quantization_mode = -1;
bsd->block_modes[i].is_dual_plane = -1;
bsd->block_modes[i].permit_encode = 0;
bsd->block_modes[i].permit_decode = 0;
bsd->block_modes[i].percentile = 1.0f;
}
else
{
int decimation_mode = decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights];
bsd->block_modes[i].decimation_mode = decimation_mode;
bsd->block_modes[i].quantization_mode = quantization_mode;
bsd->block_modes[i].is_dual_plane = is_dual_plane;
bsd->block_modes[i].permit_encode = permit_encode;
bsd->block_modes[i].permit_decode = permit_encode;
bsd->block_modes[i].percentile = percentiles[i];
if (bsd->decimation_mode_percentile[decimation_mode] > percentiles[i])
bsd->decimation_mode_percentile[decimation_mode] = percentiles[i];
}
}
if (xdim * ydim * zdim <= 64)
{
bsd->texelcount_for_bitmap_partitioning = xdim * ydim * zdim;
for (i = 0; i < xdim * ydim * zdim; i++)
bsd->texels_for_bitmap_partitioning[i] = i;
}
else
{
// pick 64 random texels for use with bitmap partitioning.
int arr[MAX_TEXELS_PER_BLOCK];
for (i = 0; i < xdim * ydim * zdim; i++)
arr[i] = 0;
int arr_elements_set = 0;
while (arr_elements_set < 64)
{
int idx = rand() % (xdim * ydim * zdim);
if (arr[idx] == 0)
{
arr_elements_set++;
arr[idx] = 1;
}
}
int texel_weights_written = 0;
int idx = 0;
while (texel_weights_written < 64)
{
if (arr[idx])
bsd->texels_for_bitmap_partitioning[texel_weights_written++] = idx;
idx++;
}
bsd->texelcount_for_bitmap_partitioning = 64;
}
}
static block_size_descriptor *bsd_pointers[4096];
// function to obtain a block size descriptor. If the descriptor does not exist,
// it is created as needed. Should not be called from within multi-threaded code.
const block_size_descriptor *get_block_size_descriptor(int xdim, int ydim, int zdim)
{
int bsd_index = xdim + (ydim << 4) + (zdim << 8);
if (bsd_pointers[bsd_index] == NULL)
{
block_size_descriptor *bsd = new block_size_descriptor;
if (zdim > 1)
construct_block_size_descriptor_3d(xdim, ydim, zdim, bsd);
else
construct_block_size_descriptor_2d(xdim, ydim, bsd);
bsd_pointers[bsd_index] = bsd;
}
return bsd_pointers[bsd_index];
}

815
3rdparty/astc/astc_codec_internals.h vendored Normal file
View File

@@ -0,0 +1,815 @@
/*----------------------------------------------------------------------------*/
/**
* This confidential and proprietary software may be used only as
* authorised by a licensing agreement from ARM Limited
* (C) COPYRIGHT 2011-2012, 2018 ARM Limited
* ALL RIGHTS RESERVED
*
* The entire notice above must be reproduced on all authorised
* copies and copies may only be made to the extent permitted
* by a licensing agreement from ARM Limited.
*
* @brief Internal function and data declarations for ASTC codec.
*/
/*----------------------------------------------------------------------------*/
#ifndef ASTC_CODEC_INTERNALS_INCLUDED
#define ASTC_CODEC_INTERNALS_INCLUDED
#include <stdint.h>
#include <stdlib.h>
#include "mathlib.h"
#ifndef MIN
#define MIN(x,y) ((x)<(y)?(x):(y))
#endif
#ifndef MAX
#define MAX(x,y) ((x)>(y)?(x):(y))
#endif
// Macro to silence warnings on ignored parameters.
// The presence of this macro should be a signal to look at refactoring.
#define IGNORE(param) ((void)&param)
#define astc_isnan(p) ((p)!=(p))
// ASTC parameters
#define MAX_TEXELS_PER_BLOCK 216
#define MAX_WEIGHTS_PER_BLOCK 64
#define MIN_WEIGHT_BITS_PER_BLOCK 24
#define MAX_WEIGHT_BITS_PER_BLOCK 96
#define PARTITION_BITS 10
#define PARTITION_COUNT (1 << PARTITION_BITS)
// the sum of weights for one texel.
#define TEXEL_WEIGHT_SUM 16
#define MAX_DECIMATION_MODES 87
#define MAX_WEIGHT_MODES 2048
// error reporting for codec internal errors.
#define ASTC_CODEC_INTERNAL_ERROR astc_codec_internal_error(__FILE__, __LINE__)
void astc_codec_internal_error(const char *filename, int linenumber);
// uncomment this macro to enable checking for inappropriate NaNs;
// works on Linux only, and slows down encoding significantly.
// #define DEBUG_CAPTURE_NAN
// the PRINT_DIAGNOSTICS macro enables the -diag command line switch,
// which can be used to look for codec bugs
#define DEBUG_PRINT_DIAGNOSTICS
#ifdef DEBUG_PRINT_DIAGNOSTICS
extern int print_diagnostics;
#endif
extern int print_tile_errors;
extern int print_statistics;
extern int perform_srgb_transform;
extern int rgb_force_use_of_hdr;
extern int alpha_force_use_of_hdr;
struct processed_line2
{
float2 amod;
float2 bs;
float2 bis;
};
struct processed_line3
{
float3 amod;
float3 bs;
float3 bis;
};
struct processed_line4
{
float4 amod;
float4 bs;
float4 bis;
};
enum astc_decode_mode
{
DECODE_LDR_SRGB,
DECODE_LDR,
DECODE_HDR
};
/*
Partition table representation:
For each block size, we have 3 tables, each with 1024 partitionings;
these three tables correspond to 2, 3 and 4 partitions respectively.
For each partitioning, we have:
* a 4-entry table indicating how many texels there are in each of the 4 partitions.
This may be from 0 to a very large value.
* a table indicating the partition index of each of the texels in the block.
Each index may be 0, 1, 2 or 3.
* Each element in the table is an uint8_t indicating partition index (0, 1, 2 or 3)
*/
struct partition_info
{
int partition_count;
uint8_t texels_per_partition[4];
uint8_t partition_of_texel[MAX_TEXELS_PER_BLOCK];
uint8_t texels_of_partition[4][MAX_TEXELS_PER_BLOCK];
uint64_t coverage_bitmaps[4]; // used for the purposes of k-means partition search.
};
/*
In ASTC, we don't necessarily provide a weight for every texel.
As such, for each block size, there are a number of patterns where some texels
have their weights computed as a weighted average of more than 1 weight.
As such, the codec uses a data structure that tells us: for each texel, which
weights it is a combination of for each weight, which texels it contributes to.
The decimation_table is this data structure.
*/
struct decimation_table
{
int num_texels;
int num_weights;
uint8_t texel_num_weights[MAX_TEXELS_PER_BLOCK]; // number of indices that go into the calculation for a texel
uint8_t texel_weights_int[MAX_TEXELS_PER_BLOCK][4]; // the weight to assign to each weight
float texel_weights_float[MAX_TEXELS_PER_BLOCK][4]; // the weight to assign to each weight
uint8_t texel_weights[MAX_TEXELS_PER_BLOCK][4]; // the weights that go into a texel calculation
uint8_t weight_num_texels[MAX_WEIGHTS_PER_BLOCK]; // the number of texels that a given weight contributes to
uint8_t weight_texel[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK]; // the texels that the weight contributes to
uint8_t weights_int[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK]; // the weights that the weight contributes to a texel.
float weights_flt[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK]; // the weights that the weight contributes to a texel.
};
/*
data structure describing information that pertains to a block size and its associated block modes.
*/
struct block_mode
{
int8_t decimation_mode;
int8_t quantization_mode;
int8_t is_dual_plane;
int8_t permit_encode;
int8_t permit_decode;
float percentile;
};
struct block_size_descriptor
{
int decimation_mode_count;
int decimation_mode_samples[MAX_DECIMATION_MODES];
int decimation_mode_maxprec_1plane[MAX_DECIMATION_MODES];
int decimation_mode_maxprec_2planes[MAX_DECIMATION_MODES];
float decimation_mode_percentile[MAX_DECIMATION_MODES];
int permit_encode[MAX_DECIMATION_MODES];
const decimation_table *decimation_tables[MAX_DECIMATION_MODES + 1];
block_mode block_modes[MAX_WEIGHT_MODES];
// for the k-means bed bitmap partitioning algorithm, we don't
// want to consider more than 64 texels; this array specifies
// which 64 texels (if that many) to consider.
int texelcount_for_bitmap_partitioning;
int texels_for_bitmap_partitioning[64];
};
// data structure representing one block of an image.
// it is expanded to float prior to processing to save some computation time
// on conversions to/from uint8_t (this also allows us to handle HDR textures easily)
struct imageblock
{
float orig_data[MAX_TEXELS_PER_BLOCK * 4]; // original input data
float work_data[MAX_TEXELS_PER_BLOCK * 4]; // the data that we will compress, either linear or LNS (0..65535 in both cases)
float deriv_data[MAX_TEXELS_PER_BLOCK * 4]; // derivative of the conversion function used, used to modify error weighting
uint8_t rgb_lns[MAX_TEXELS_PER_BLOCK]; // 1 if RGB data are being treated as LNS
uint8_t alpha_lns[MAX_TEXELS_PER_BLOCK]; // 1 if Alpha data are being treated as LNS
uint8_t nan_texel[MAX_TEXELS_PER_BLOCK]; // 1 if the texel is a NaN-texel.
float red_min, red_max;
float green_min, green_max;
float blue_min, blue_max;
float alpha_min, alpha_max;
int grayscale; // 1 if R=G=B for every pixel, 0 otherwise
int xpos, ypos, zpos;
};
struct error_weighting_params
{
float rgb_power;
float rgb_base_weight;
float rgb_mean_weight;
float rgb_stdev_weight;
float alpha_power;
float alpha_base_weight;
float alpha_mean_weight;
float alpha_stdev_weight;
float rgb_mean_and_stdev_mixing;
int mean_stdev_radius;
int enable_rgb_scale_with_alpha;
int alpha_radius;
int ra_normal_angular_scale;
float block_artifact_suppression;
float rgba_weights[4];
float block_artifact_suppression_expanded[MAX_TEXELS_PER_BLOCK];
// parameters that deal with heuristic codec speedups
int partition_search_limit;
float block_mode_cutoff;
float texel_avg_error_limit;
float partition_1_to_2_limit;
float lowest_correlation_cutoff;
int max_refinement_iters;
};
void update_imageblock_flags(imageblock * pb, int xdim, int ydim, int zdim);
void imageblock_initialize_orig_from_work(imageblock * pb, int pixelcount);
void imageblock_initialize_work_from_orig(imageblock * pb, int pixelcount);
/*
Data structure representing error weighting for one block of an image. this is used as
a multiplier for the error weight to apply to each color component when computing PSNR.
This weighting has several uses: it's usable for RA, GA, BA, A weighting, which is useful
for alpha-textures it's usable for HDR textures, where weighting should be approximately inverse to
luminance it's usable for perceptual weighting, where we assign higher weight to low-variability
regions than to high-variability regions. it's usable for suppressing off-edge block content in
case the texture doesn't actually extend to the edge of the block.
For the default case (everything is evenly weighted), every weight is 1. For the RA,GA,BA,A case,
we multiply the R,G,B weights with that of the alpha.
Putting the same weight in every component should result in the default case.
The following relations should hold:
texel_weight_rg[i] = (texel_weight_r[i] + texel_weight_g[i]) / 2
texel_weight_lum[i] = (texel_weight_r[i] + texel_weight_g[i] + texel_weight_b[i]) / 3
texel_weight[i] = (texel_weight_r[i] + texel_weight_g[i] + texel_weight_b[i] + texel_weight_a[i] / 4
*/
struct error_weight_block
{
float4 error_weights[MAX_TEXELS_PER_BLOCK];
float texel_weight[MAX_TEXELS_PER_BLOCK];
float texel_weight_gba[MAX_TEXELS_PER_BLOCK];
float texel_weight_rba[MAX_TEXELS_PER_BLOCK];
float texel_weight_rga[MAX_TEXELS_PER_BLOCK];
float texel_weight_rgb[MAX_TEXELS_PER_BLOCK];
float texel_weight_rg[MAX_TEXELS_PER_BLOCK];
float texel_weight_rb[MAX_TEXELS_PER_BLOCK];
float texel_weight_gb[MAX_TEXELS_PER_BLOCK];
float texel_weight_ra[MAX_TEXELS_PER_BLOCK];
float texel_weight_r[MAX_TEXELS_PER_BLOCK];
float texel_weight_g[MAX_TEXELS_PER_BLOCK];
float texel_weight_b[MAX_TEXELS_PER_BLOCK];
float texel_weight_a[MAX_TEXELS_PER_BLOCK];
int contains_zeroweight_texels;
};
struct error_weight_block_orig
{
float4 error_weights[MAX_TEXELS_PER_BLOCK];
};
// enumeration of all the quantization methods we support under this format.
enum quantization_method
{
QUANT_2 = 0,
QUANT_3 = 1,
QUANT_4 = 2,
QUANT_5 = 3,
QUANT_6 = 4,
QUANT_8 = 5,
QUANT_10 = 6,
QUANT_12 = 7,
QUANT_16 = 8,
QUANT_20 = 9,
QUANT_24 = 10,
QUANT_32 = 11,
QUANT_40 = 12,
QUANT_48 = 13,
QUANT_64 = 14,
QUANT_80 = 15,
QUANT_96 = 16,
QUANT_128 = 17,
QUANT_160 = 18,
QUANT_192 = 19,
QUANT_256 = 20
};
/*
In ASTC, we support relatively many combinations of weight precisions and weight transfer functions.
As such, for each combination we support, we have a hardwired data structure.
This structure provides the following information: A table, used to estimate the closest quantized
weight for a given floating-point weight. For each quantized weight, the corresponding unquantized
and floating-point values. For each quantized weight, a previous-value and a next-value.
*/
struct quantization_and_transfer_table
{
quantization_method method;
uint8_t unquantized_value[32]; // 0..64
float unquantized_value_flt[32]; // 0..1
uint8_t prev_quantized_value[32];
uint8_t next_quantized_value[32];
uint8_t closest_quantized_weight[1025];
};
extern const quantization_and_transfer_table quant_and_xfer_tables[12];
enum endpoint_formats
{
FMT_LUMINANCE = 0,
FMT_LUMINANCE_DELTA = 1,
FMT_HDR_LUMINANCE_LARGE_RANGE = 2,
FMT_HDR_LUMINANCE_SMALL_RANGE = 3,
FMT_LUMINANCE_ALPHA = 4,
FMT_LUMINANCE_ALPHA_DELTA = 5,
FMT_RGB_SCALE = 6,
FMT_HDR_RGB_SCALE = 7,
FMT_RGB = 8,
FMT_RGB_DELTA = 9,
FMT_RGB_SCALE_ALPHA = 10,
FMT_HDR_RGB = 11,
FMT_RGBA = 12,
FMT_RGBA_DELTA = 13,
FMT_HDR_RGB_LDR_ALPHA = 14,
FMT_HDR_RGBA = 15,
};
struct symbolic_compressed_block
{
int error_block; // 1 marks error block, 0 marks non-error-block.
int block_mode; // 0 to 2047. Negative value marks constant-color block (-1: FP16, -2:UINT16)
int partition_count; // 1 to 4; Zero marks a constant-color block.
int partition_index; // 0 to 1023
int color_formats[4]; // color format for each endpoint color pair.
int color_formats_matched; // color format for all endpoint pairs are matched.
int color_values[4][12]; // quantized endpoint color pairs.
int color_quantization_level;
uint8_t plane1_weights[MAX_WEIGHTS_PER_BLOCK]; // quantized and decimated weights
uint8_t plane2_weights[MAX_WEIGHTS_PER_BLOCK];
int plane2_color_component; // color component for the secondary plane of weights
int constant_color[4]; // constant-color, as FP16 or UINT16. Used for constant-color blocks only.
};
struct physical_compressed_block
{
uint8_t data[16];
};
const block_size_descriptor *get_block_size_descriptor(int xdim, int ydim, int zdim);
// ***********************************************************
// functions and data pertaining to quantization and encoding
// **********************************************************
extern const uint8_t color_quantization_tables[21][256];
extern const uint8_t color_unquantization_tables[21][256];
void encode_ise(int quantization_level, int elements, const uint8_t * input_data, uint8_t * output_data, int bit_offset);
void decode_ise(int quantization_level, int elements, const uint8_t * input_data, uint8_t * output_data, int bit_offset);
int compute_ise_bitcount(int items, quantization_method quant);
void build_quantization_mode_table(void);
extern int quantization_mode_table[17][128];
// **********************************************
// functions and data pertaining to partitioning
// **********************************************
// function to get a pointer to a partition table or an array thereof.
const partition_info *get_partition_table(int xdim, int ydim, int zdim, int partition_count);
// functions to compute color averages and dominant directions
// for each partition in a block
void compute_averages_and_directions_rgb(const partition_info * pt,
const imageblock * blk,
const error_weight_block * ewb,
const float4 * color_scalefactors, float3 * averages, float3 * directions_rgb, float2 * directions_rg, float2 * directions_rb, float2 * directions_gb);
void compute_averages_and_directions_rgba(const partition_info * pt,
const imageblock * blk,
const error_weight_block * ewb,
const float4 * color_scalefactors,
float4 * averages, float4 * directions_rgba, float3 * directions_gba, float3 * directions_rba, float3 * directions_rga, float3 * directions_rgb);
void compute_averages_and_directions_3_components(const partition_info * pt,
const imageblock * blk,
const error_weight_block * ewb,
const float3 * color_scalefactors, int component1, int component2, int component3, float3 * averages, float3 * directions);
void compute_averages_and_directions_2_components(const partition_info * pt,
const imageblock * blk,
const error_weight_block * ewb, const float2 * color_scalefactors, int component1, int component2, float2 * averages, float2 * directions);
// functions to compute error value across a tile given a partitioning
// (with the assumption that each partitioning has colors lying on a line where
// they are represented with infinite precision. Also return the length of the line
// segments that the partition's colors are actually projected onto.
float compute_error_squared_gba(const partition_info * pt, // the partition that we use when computing the squared-error.
const imageblock * blk, const error_weight_block * ewb, const processed_line3 * plines,
// output: computed length of the partitioning's line. This is not part of the
// error introduced by partitioning itself, but us used to estimate the error introduced by quantization
float *length_of_lines);
float compute_error_squared_rba(const partition_info * pt, // the partition that we use when computing the squared-error.
const imageblock * blk, const error_weight_block * ewb, const processed_line3 * plines,
// output: computed length of the partitioning's line. This is not part of the
// error introduced by partitioning itself, but us used to estimate the error introduced by quantization
float *length_of_lines);
float compute_error_squared_rga(const partition_info * pt, // the partition that we use when computing the squared-error.
const imageblock * blk, const error_weight_block * ewb, const processed_line3 * plines,
// output: computed length of the partitioning's line. This is not part of the
// error introduced by partitioning itself, but us used to estimate the error introduced by quantization
float *length_of_lines);
float compute_error_squared_rgb(const partition_info * pt, // the partition that we use when computing the squared-error.
const imageblock * blk, const error_weight_block * ewb, const processed_line3 * plines,
// output: computed length of the partitioning's line. This is not part of the
// error introduced by partitioning itself, but us used to estimate the error introduced by quantization
float *length_of_lines);
float compute_error_squared_rgba(const partition_info * pt, // the partition that we use when computing the squared-error.
const imageblock * blk, const error_weight_block * ewb, const processed_line4 * lines, // one line for each of the partitions. The lines are assumed to be normalized.
float *length_of_lines);
float compute_error_squared_rg(const partition_info * pt, // the partition that we use when computing the squared-error.
const imageblock * blk, const error_weight_block * ewb, const processed_line2 * plines, float *length_of_lines);
float compute_error_squared_rb(const partition_info * pt, // the partition that we use when computing the squared-error.
const imageblock * blk, const error_weight_block * ewb, const processed_line2 * plines, float *length_of_lines);
float compute_error_squared_gb(const partition_info * pt, // the partition that we use when computing the squared-error.
const imageblock * blk, const error_weight_block * ewb, const processed_line2 * plines, float *length_of_lines);
float compute_error_squared_ra(const partition_info * pt, // the partition that we use when computing the squared-error.
const imageblock * blk, const error_weight_block * ewb, const processed_line2 * plines, float *length_of_lines);
// functions to compute error value across a tile for a particular line function
// for a single partition.
float compute_error_squared_rgb_single_partition(int partition_to_test, int xdim, int ydim, int zdim, const partition_info * pt, // the partition that we use when computing the squared-error.
const imageblock * blk, const error_weight_block * ewb, const processed_line3 * lin // the line for the partition.
);
// for each partition, compute its color weightings.
void compute_partition_error_color_weightings(int xdim, int ydim, int zdim, const error_weight_block * ewb, const partition_info * pi, float4 error_weightings[4], float4 color_scalefactors[4]);
// function to find the best partitioning for a given block.
void find_best_partitionings(int partition_search_limit, int xdim, int ydim, int zdim, int partition_count, const imageblock * pb, const error_weight_block * ewb, int candidates_to_return,
// best partitionings to use if the endpoint colors are assumed to be uncorrelated
int *best_partitions_uncorrellated,
// best partitionings to use if the endpoint colors have the same chroma
int *best_partitions_samechroma,
// best partitionings to use if dual plane of weights are present
int *best_partitions_dual_weight_planes);
// use k-means clustering to compute a partition ordering for a block.
void kmeans_compute_partition_ordering(int xdim, int ydim, int zdim, int partition_count, const imageblock * blk, int *ordering);
// *********************************************************
// functions and data pertaining to images and imageblocks
// *********************************************************
struct astc_codec_image
{
uint8_t ***imagedata8;
uint16_t ***imagedata16;
int xsize;
int ysize;
int zsize;
int padding;
};
void destroy_image(astc_codec_image * img);
astc_codec_image *allocate_image(int bitness, int xsize, int ysize, int zsize, int padding);
void initialize_image(astc_codec_image * img);
void fill_image_padding_area(astc_codec_image * img);
extern float4 ***input_averages;
extern float4 ***input_variances;
extern float ***input_alpha_averages;
// the entries here : 0=red, 1=green, 2=blue, 3=alpha, 4=0.0, 5=1.0
struct swizzlepattern
{
uint8_t r;
uint8_t g;
uint8_t b;
uint8_t a;
};
int determine_image_channels(const astc_codec_image * img);
// function to compute regional averages and variances for an image
void compute_averages_and_variances(const astc_codec_image * img, float rgb_power_to_use, float alpha_power_to_use, int avg_kernel_radius, int var_kernel_radius, swizzlepattern swz);
/*
Functions to load image from file.
If successful, return an astc_codec_image object.
If unsuccessful, returns NULL.
*result is used to return a result. In case of a successfully loaded image, bits[2:0]
of *result indicate how many components are present, and bit[7] indicate whether
the input image was LDR or HDR (0=LDR, 1=HDR).
In case of failure, *result is given a negative value.
*/
astc_codec_image *load_ktx_uncompressed_image(const char *filename, int padding, int *result);
astc_codec_image *load_dds_uncompressed_image(const char *filename, int padding, int *result);
astc_codec_image *load_tga_image(const char *tga_filename, int padding, int *result);
astc_codec_image *load_image_with_stb(const char *filename, int padding, int *result);
astc_codec_image *astc_codec_load_image(const char *filename, int padding, int *result);
int astc_codec_unlink(const char *filename);
// function to store image to file
// If successful, returns the number of channels in input image
// If unsuccessful, returns a negative number.
int store_ktx_uncompressed_image(const astc_codec_image * img, const char *filename, int bitness);
int store_dds_uncompressed_image(const astc_codec_image * img, const char *filename, int bitness);
int store_tga_image(const astc_codec_image * img, const char *tga_filename, int bitness);
int astc_codec_store_image(const astc_codec_image * img, const char *filename, int bitness, const char **format_string);
int get_output_filename_enforced_bitness(const char *filename);
// compute a bunch of error metrics
void compute_error_metrics(int input_image_is_hdr, int input_components, const astc_codec_image * img1, const astc_codec_image * img2, int low_fstop, int high_fstop, int psnrmode);
// fetch an image-block from the input file
void fetch_imageblock(const astc_codec_image * img, imageblock * pb, // picture-block to initialize with image data
// block dimensions
int xdim, int ydim, int zdim,
// position in picture to fetch block from
int xpos, int ypos, int zpos, swizzlepattern swz);
// write an image block to the output file buffer.
// the data written are taken from orig_data.
void write_imageblock(astc_codec_image * img, const imageblock * pb, // picture-block to initialize with image data
// block dimensions
int xdim, int ydim, int zdim,
// position in picture to write block to.
int xpos, int ypos, int zpos, swizzlepattern swz);
// helper function to check whether a given picture-block has alpha that is not
// just uniformly 1.
int imageblock_uses_alpha(int xdim, int ydim, int zdim, const imageblock * pb);
float compute_imageblock_difference(int xdim, int ydim, int zdim, const imageblock * p1, const imageblock * p2, const error_weight_block * ewb);
// ***********************************************************
// functions pertaining to computing texel weights for a block
// ***********************************************************
struct endpoints
{
int partition_count;
float4 endpt0[4];
float4 endpt1[4];
};
struct endpoints_and_weights
{
endpoints ep;
float weights[MAX_TEXELS_PER_BLOCK];
float weight_error_scale[MAX_TEXELS_PER_BLOCK];
};
void compute_endpoints_and_ideal_weights_1_plane(int xdim, int ydim, int zdim, const partition_info * pt, const imageblock * blk, const error_weight_block * ewb, endpoints_and_weights * ei);
void compute_endpoints_and_ideal_weights_2_planes(int xdim, int ydim, int zdim, const partition_info * pt, const imageblock * blk, const error_weight_block * ewb, int separate_component,
endpoints_and_weights * ei1, // for the three components of the primary plane of weights
endpoints_and_weights * ei2 // for the remaining component.
);
void compute_ideal_weights_for_decimation_table(const endpoints_and_weights * eai, const decimation_table * it, float *weight_set, float *weights);
void compute_ideal_quantized_weights_for_decimation_table(const endpoints_and_weights * eai,
const decimation_table * it,
float low_bound, float high_bound, const float *weight_set_in, float *weight_set_out, uint8_t * quantized_weight_set, int quantization_level);
float compute_error_of_weight_set(const endpoints_and_weights * eai, const decimation_table * it, const float *weights);
float compute_value_of_texel_flt(int texel_to_get, const decimation_table * it, const float *weights);
int compute_value_of_texel_int(int texel_to_get, const decimation_table * it, const int *weights);
void merge_endpoints(const endpoints * ep1, // contains three of the color components
const endpoints * ep2, // contains the remaining color component
int separate_component, endpoints * res);
// functions dealing with color endpoints
// function to pack a pair of color endpoints into a series of integers.
// the format used may or may not match the format specified;
// the return value is the format actually used.
int pack_color_endpoints(astc_decode_mode decode_mode, float4 color0, float4 color1, float4 rgbs_color, float4 rgbo_color, float2 luminances, int format, int *output, int quantization_level);
// unpack a pair of color endpoints from a series of integers.
void unpack_color_endpoints(astc_decode_mode decode_mode, int format, int quantization_level, const int *input, int *rgb_hdr, int *alpha_hdr, int *nan_endpoint, ushort4 * output0, ushort4 * output1);
struct encoding_choice_errors
{
float rgb_scale_error; // error of using LDR RGB-scale instead of complete endpoints.
float rgb_luma_error; // error of using HDR RGB-scale instead of complete endpoints.
float luminance_error; // error of using luminance instead of RGB
float alpha_drop_error; // error of discarding alpha
float rgb_drop_error; // error of discarding RGB
int can_offset_encode;
int can_blue_contract;
};
// buffers used to store intermediate data in compress_symbolic_block_fixed_partition_*()
struct compress_fixed_partition_buffers
{
endpoints_and_weights* ei1;
endpoints_and_weights* ei2;
endpoints_and_weights* eix1;
endpoints_and_weights* eix2;
float *decimated_quantized_weights;
float *decimated_weights;
float *flt_quantized_decimated_quantized_weights;
uint8_t *u8_quantized_decimated_quantized_weights;
};
struct compress_symbolic_block_buffers
{
error_weight_block *ewb;
error_weight_block_orig *ewbo;
symbolic_compressed_block *tempblocks;
imageblock *temp;
compress_fixed_partition_buffers *plane1;
compress_fixed_partition_buffers *planes2;
};
void compute_encoding_choice_errors(int xdim, int ydim, int zdim, const imageblock * pb, const partition_info * pi, const error_weight_block * ewb,
int separate_component, // component that is separated out in 2-plane mode, -1 in 1-plane mode
encoding_choice_errors * eci);
void determine_optimal_set_of_endpoint_formats_to_use(int xdim, int ydim, int zdim, const partition_info * pt, const imageblock * blk, const error_weight_block * ewb, const endpoints * ep,
int separate_component, // separate color component for 2-plane mode; -1 for single-plane mode
// bitcounts and errors computed for the various quantization methods
const int *qwt_bitcounts, const float *qwt_errors,
// output data
int partition_format_specifiers[4][4], int quantized_weight[4], int quantization_level[4], int quantization_level_mod[4]);
void recompute_ideal_colors(int xdim, int ydim, int zdim, int weight_quantization_mode, endpoints * ep, // contains the endpoints we wish to update
float4 * rgbs_vectors, // used to return RGBS-vectors for endpoint mode #6
float4 * rgbo_vectors, // used to return RGBS-vectors for endpoint mode #7
float2 * lum_vectors, // used to return luminance-vectors.
const uint8_t * weight_set, // the current set of weight values
const uint8_t * plane2_weight_set, // NULL if plane 2 is not actually used.
int plane2_color_component, // color component for 2nd plane of weights; -1 if the 2nd plane of weights is not present
const partition_info * pi, const decimation_table * it, const imageblock * pb, // picture-block containing the actual data.
const error_weight_block * ewb);
void expand_block_artifact_suppression(int xdim, int ydim, int zdim, error_weighting_params * ewp);
// Function to set error weights for each color component for each texel in a block.
// Returns the sum of all the error values set.
float prepare_error_weight_block(const astc_codec_image * input_image,
// dimensions of error weight block.
int xdim, int ydim, int zdim, const error_weighting_params * ewp, const imageblock * blk, error_weight_block * ewb, error_weight_block_orig * ewbo);
// functions pertaining to weight alignment
void prepare_angular_tables(void);
void compute_angular_endpoints_1plane(float mode_cutoff,
const block_size_descriptor * bsd,
const float *decimated_quantized_weights, const float *decimated_weights, float low_value[MAX_WEIGHT_MODES], float high_value[MAX_WEIGHT_MODES]);
void compute_angular_endpoints_2planes(float mode_cutoff,
const block_size_descriptor * bsd,
const float *decimated_quantized_weights,
const float *decimated_weights,
float low_value1[MAX_WEIGHT_MODES], float high_value1[MAX_WEIGHT_MODES], float low_value2[MAX_WEIGHT_MODES], float high_value2[MAX_WEIGHT_MODES]);
/* *********************************** high-level encode and decode functions ************************************ */
float compress_symbolic_block(const astc_codec_image * input_image,
astc_decode_mode decode_mode, int xdim, int ydim, int zdim, const error_weighting_params * ewp, const imageblock * blk, symbolic_compressed_block * scb,
compress_symbolic_block_buffers * tmpbuf);
float4 lerp_color_flt(const float4 color0, const float4 color1, float weight, // 0..1
float plane2_weight, // 0..1
int plane2_color_component // 0..3; -1 if only one plane of weights is present.
);
ushort4 lerp_color_int(astc_decode_mode decode_mode, ushort4 color0, ushort4 color1, int weight, // 0..64
int plane2_weight, // 0..64
int plane2_color_component // 0..3; -1 if only one plane of weights is present.
);
void decompress_symbolic_block(astc_decode_mode decode_mode,
// dimensions of block
int xdim, int ydim, int zdim,
// position of block
int xpos, int ypos, int zpos, const symbolic_compressed_block * scb, imageblock * blk);
physical_compressed_block symbolic_to_physical(int xdim, int ydim, int zdim, const symbolic_compressed_block * sc);
void physical_to_symbolic(int xdim, int ydim, int zdim, physical_compressed_block pb, symbolic_compressed_block * res);
uint16_t unorm16_to_sf16(uint16_t p);
uint16_t lns_to_sf16(uint16_t p);
#endif

2096
3rdparty/astc/astc_color_quantize.cpp vendored Normal file

File diff suppressed because it is too large Load Diff

970
3rdparty/astc/astc_color_unquantize.cpp vendored Normal file
View File

@@ -0,0 +1,970 @@
/*----------------------------------------------------------------------------*/
/**
* This confidential and proprietary software may be used only as
* authorised by a licensing agreement from ARM Limited
* (C) COPYRIGHT 2011-2012 ARM Limited
* ALL RIGHTS RESERVED
*
* The entire notice above must be reproduced on all authorised
* copies and copies may only be made to the extent permitted
* by a licensing agreement from ARM Limited.
*
* @brief Color unquantization functions for ASTC.
*/
/*----------------------------------------------------------------------------*/
#include "astc_codec_internals.h"
#include "mathlib.h"
#include "softfloat.h"
int rgb_delta_unpack(const int input[6], int quantization_level, ushort4 * output0, ushort4 * output1)
{
// unquantize the color endpoints
int r0 = color_unquantization_tables[quantization_level][input[0]];
int g0 = color_unquantization_tables[quantization_level][input[2]];
int b0 = color_unquantization_tables[quantization_level][input[4]];
int r1 = color_unquantization_tables[quantization_level][input[1]];
int g1 = color_unquantization_tables[quantization_level][input[3]];
int b1 = color_unquantization_tables[quantization_level][input[5]];
// perform the bit-transfer procedure
r0 |= (r1 & 0x80) << 1;
g0 |= (g1 & 0x80) << 1;
b0 |= (b1 & 0x80) << 1;
r1 &= 0x7F;
g1 &= 0x7F;
b1 &= 0x7F;
if (r1 & 0x40)
r1 -= 0x80;
if (g1 & 0x40)
g1 -= 0x80;
if (b1 & 0x40)
b1 -= 0x80;
r0 >>= 1;
g0 >>= 1;
b0 >>= 1;
r1 >>= 1;
g1 >>= 1;
b1 >>= 1;
int rgbsum = r1 + g1 + b1;
r1 += r0;
g1 += g0;
b1 += b0;
int retval;
int r0e, g0e, b0e;
int r1e, g1e, b1e;
if (rgbsum >= 0)
{
r0e = r0;
g0e = g0;
b0e = b0;
r1e = r1;
g1e = g1;
b1e = b1;
retval = 0;
}
else
{
r0e = (r1 + b1) >> 1;
g0e = (g1 + b1) >> 1;
b0e = b1;
r1e = (r0 + b0) >> 1;
g1e = (g0 + b0) >> 1;
b1e = b0;
retval = 1;
}
if (r0e < 0)
r0e = 0;
else if (r0e > 255)
r0e = 255;
if (g0e < 0)
g0e = 0;
else if (g0e > 255)
g0e = 255;
if (b0e < 0)
b0e = 0;
else if (b0e > 255)
b0e = 255;
if (r1e < 0)
r1e = 0;
else if (r1e > 255)
r1e = 255;
if (g1e < 0)
g1e = 0;
else if (g1e > 255)
g1e = 255;
if (b1e < 0)
b1e = 0;
else if (b1e > 255)
b1e = 255;
output0->x = r0e;
output0->y = g0e;
output0->z = b0e;
output0->w = 0xFF;
output1->x = r1e;
output1->y = g1e;
output1->z = b1e;
output1->w = 0xFF;
return retval;
}
int rgb_unpack(const int input[6], int quantization_level, ushort4 * output0, ushort4 * output1)
{
int ri0b = color_unquantization_tables[quantization_level][input[0]];
int ri1b = color_unquantization_tables[quantization_level][input[1]];
int gi0b = color_unquantization_tables[quantization_level][input[2]];
int gi1b = color_unquantization_tables[quantization_level][input[3]];
int bi0b = color_unquantization_tables[quantization_level][input[4]];
int bi1b = color_unquantization_tables[quantization_level][input[5]];
if (ri0b + gi0b + bi0b > ri1b + gi1b + bi1b)
{
// blue-contraction
ri0b = (ri0b + bi0b) >> 1;
gi0b = (gi0b + bi0b) >> 1;
ri1b = (ri1b + bi1b) >> 1;
gi1b = (gi1b + bi1b) >> 1;
output0->x = ri1b;
output0->y = gi1b;
output0->z = bi1b;
output0->w = 255;
output1->x = ri0b;
output1->y = gi0b;
output1->z = bi0b;
output1->w = 255;
return 1;
}
else
{
output0->x = ri0b;
output0->y = gi0b;
output0->z = bi0b;
output0->w = 255;
output1->x = ri1b;
output1->y = gi1b;
output1->z = bi1b;
output1->w = 255;
return 0;
}
}
void rgba_unpack(const int input[8], int quantization_level, ushort4 * output0, ushort4 * output1)
{
int order = rgb_unpack(input, quantization_level, output0, output1);
if (order == 0)
{
output0->w = color_unquantization_tables[quantization_level][input[6]];
output1->w = color_unquantization_tables[quantization_level][input[7]];
}
else
{
output0->w = color_unquantization_tables[quantization_level][input[7]];
output1->w = color_unquantization_tables[quantization_level][input[6]];
}
}
void rgba_delta_unpack(const int input[8], int quantization_level, ushort4 * output0, ushort4 * output1)
{
int a0 = color_unquantization_tables[quantization_level][input[6]];
int a1 = color_unquantization_tables[quantization_level][input[7]];
a0 |= (a1 & 0x80) << 1;
a1 &= 0x7F;
if (a1 & 0x40)
a1 -= 0x80;
a0 >>= 1;
a1 >>= 1;
a1 += a0;
if (a1 < 0)
a1 = 0;
else if (a1 > 255)
a1 = 255;
int order = rgb_delta_unpack(input, quantization_level, output0, output1);
if (order == 0)
{
output0->w = a0;
output1->w = a1;
}
else
{
output0->w = a1;
output1->w = a0;
}
}
void rgb_scale_unpack(const int input[4], int quantization_level, ushort4 * output0, ushort4 * output1)
{
int ir = color_unquantization_tables[quantization_level][input[0]];
int ig = color_unquantization_tables[quantization_level][input[1]];
int ib = color_unquantization_tables[quantization_level][input[2]];
int iscale = color_unquantization_tables[quantization_level][input[3]];
*output1 = ushort4(ir, ig, ib, 255);
*output0 = ushort4((ir * iscale) >> 8, (ig * iscale) >> 8, (ib * iscale) >> 8, 255);
}
void rgb_scale_alpha_unpack(const int input[6], int quantization_level, ushort4 * output0, ushort4 * output1)
{
rgb_scale_unpack(input, quantization_level, output0, output1);
output0->w = color_unquantization_tables[quantization_level][input[4]];
output1->w = color_unquantization_tables[quantization_level][input[5]];
}
void luminance_unpack(const int input[2], int quantization_level, ushort4 * output0, ushort4 * output1)
{
int lum0 = color_unquantization_tables[quantization_level][input[0]];
int lum1 = color_unquantization_tables[quantization_level][input[1]];
*output0 = ushort4(lum0, lum0, lum0, 255);
*output1 = ushort4(lum1, lum1, lum1, 255);
}
void luminance_delta_unpack(const int input[2], int quantization_level, ushort4 * output0, ushort4 * output1)
{
int v0 = color_unquantization_tables[quantization_level][input[0]];
int v1 = color_unquantization_tables[quantization_level][input[1]];
int l0 = (v0 >> 2) | (v1 & 0xC0);
int l1 = l0 + (v1 & 0x3F);
if (l1 > 255)
l1 = 255;
*output0 = ushort4(l0, l0, l0, 255);
*output1 = ushort4(l1, l1, l1, 255);
}
void luminance_alpha_unpack(const int input[4], int quantization_level, ushort4 * output0, ushort4 * output1)
{
int lum0 = color_unquantization_tables[quantization_level][input[0]];
int lum1 = color_unquantization_tables[quantization_level][input[1]];
int alpha0 = color_unquantization_tables[quantization_level][input[2]];
int alpha1 = color_unquantization_tables[quantization_level][input[3]];
*output0 = ushort4(lum0, lum0, lum0, alpha0);
*output1 = ushort4(lum1, lum1, lum1, alpha1);
}
void luminance_alpha_delta_unpack(const int input[4], int quantization_level, ushort4 * output0, ushort4 * output1)
{
int lum0 = color_unquantization_tables[quantization_level][input[0]];
int lum1 = color_unquantization_tables[quantization_level][input[1]];
int alpha0 = color_unquantization_tables[quantization_level][input[2]];
int alpha1 = color_unquantization_tables[quantization_level][input[3]];
lum0 |= (lum1 & 0x80) << 1;
alpha0 |= (alpha1 & 0x80) << 1;
lum1 &= 0x7F;
alpha1 &= 0x7F;
if (lum1 & 0x40)
lum1 -= 0x80;
if (alpha1 & 0x40)
alpha1 -= 0x80;
lum0 >>= 1;
lum1 >>= 1;
alpha0 >>= 1;
alpha1 >>= 1;
lum1 += lum0;
alpha1 += alpha0;
if (lum1 < 0)
lum1 = 0;
else if (lum1 > 255)
lum1 = 255;
if (alpha1 < 0)
alpha1 = 0;
else if (alpha1 > 255)
alpha1 = 255;
*output0 = ushort4(lum0, lum0, lum0, alpha0);
*output1 = ushort4(lum1, lum1, lum1, alpha1);
}
// RGB-offset format
void hdr_rgbo_unpack3(const int input[4], int quantization_level, ushort4 * output0, ushort4 * output1)
{
int v0 = color_unquantization_tables[quantization_level][input[0]];
int v1 = color_unquantization_tables[quantization_level][input[1]];
int v2 = color_unquantization_tables[quantization_level][input[2]];
int v3 = color_unquantization_tables[quantization_level][input[3]];
int modeval = ((v0 & 0xC0) >> 6) | (((v1 & 0x80) >> 7) << 2) | (((v2 & 0x80) >> 7) << 3);
int majcomp;
int mode;
if ((modeval & 0xC) != 0xC)
{
majcomp = modeval >> 2;
mode = modeval & 3;
}
else if (modeval != 0xF)
{
majcomp = modeval & 3;
mode = 4;
}
else
{
majcomp = 0;
mode = 5;
}
int red = v0 & 0x3F;
int green = v1 & 0x1F;
int blue = v2 & 0x1F;
int scale = v3 & 0x1F;
int bit0 = (v1 >> 6) & 1;
int bit1 = (v1 >> 5) & 1;
int bit2 = (v2 >> 6) & 1;
int bit3 = (v2 >> 5) & 1;
int bit4 = (v3 >> 7) & 1;
int bit5 = (v3 >> 6) & 1;
int bit6 = (v3 >> 5) & 1;
int ohcomp = 1 << mode;
if (ohcomp & 0x30)
green |= bit0 << 6;
if (ohcomp & 0x3A)
green |= bit1 << 5;
if (ohcomp & 0x30)
blue |= bit2 << 6;
if (ohcomp & 0x3A)
blue |= bit3 << 5;
if (ohcomp & 0x3D)
scale |= bit6 << 5;
if (ohcomp & 0x2D)
scale |= bit5 << 6;
if (ohcomp & 0x04)
scale |= bit4 << 7;
if (ohcomp & 0x3B)
red |= bit4 << 6;
if (ohcomp & 0x04)
red |= bit3 << 6;
if (ohcomp & 0x10)
red |= bit5 << 7;
if (ohcomp & 0x0F)
red |= bit2 << 7;
if (ohcomp & 0x05)
red |= bit1 << 8;
if (ohcomp & 0x0A)
red |= bit0 << 8;
if (ohcomp & 0x05)
red |= bit0 << 9;
if (ohcomp & 0x02)
red |= bit6 << 9;
if (ohcomp & 0x01)
red |= bit3 << 10;
if (ohcomp & 0x02)
red |= bit5 << 10;
// expand to 12 bits.
static const int shamts[6] = { 1, 1, 2, 3, 4, 5 };
int shamt = shamts[mode];
red <<= shamt;
green <<= shamt;
blue <<= shamt;
scale <<= shamt;
// on modes 0 to 4, the values stored for "green" and "blue" are differentials,
// not absolute values.
if (mode != 5)
{
green = red - green;
blue = red - blue;
}
// switch around components.
int temp;
switch (majcomp)
{
case 1:
temp = red;
red = green;
green = temp;
break;
case 2:
temp = red;
red = blue;
blue = temp;
break;
default:
break;
}
int red0 = red - scale;
int green0 = green - scale;
int blue0 = blue - scale;
// clamp to [0,0xFFF].
if (red < 0)
red = 0;
if (green < 0)
green = 0;
if (blue < 0)
blue = 0;
if (red0 < 0)
red0 = 0;
if (green0 < 0)
green0 = 0;
if (blue0 < 0)
blue0 = 0;
*output0 = ushort4(red0 << 4, green0 << 4, blue0 << 4, 0x7800);
*output1 = ushort4(red << 4, green << 4, blue << 4, 0x7800);
}
void hdr_rgb_unpack3(const int input[6], int quantization_level, ushort4 * output0, ushort4 * output1)
{
int v0 = color_unquantization_tables[quantization_level][input[0]];
int v1 = color_unquantization_tables[quantization_level][input[1]];
int v2 = color_unquantization_tables[quantization_level][input[2]];
int v3 = color_unquantization_tables[quantization_level][input[3]];
int v4 = color_unquantization_tables[quantization_level][input[4]];
int v5 = color_unquantization_tables[quantization_level][input[5]];
// extract all the fixed-placement bitfields
int modeval = ((v1 & 0x80) >> 7) | (((v2 & 0x80) >> 7) << 1) | (((v3 & 0x80) >> 7) << 2);
int majcomp = ((v4 & 0x80) >> 7) | (((v5 & 0x80) >> 7) << 1);
if (majcomp == 3)
{
*output0 = ushort4(v0 << 8, v2 << 8, (v4 & 0x7F) << 9, 0x7800);
*output1 = ushort4(v1 << 8, v3 << 8, (v5 & 0x7F) << 9, 0x7800);
return;
}
int a = v0 | ((v1 & 0x40) << 2);
int b0 = v2 & 0x3f;
int b1 = v3 & 0x3f;
int c = v1 & 0x3f;
int d0 = v4 & 0x7f;
int d1 = v5 & 0x7f;
// get hold of the number of bits in 'd0' and 'd1'
static const int dbits_tab[8] = { 7, 6, 7, 6, 5, 6, 5, 6 };
int dbits = dbits_tab[modeval];
// extract six variable-placement bits
int bit0 = (v2 >> 6) & 1;
int bit1 = (v3 >> 6) & 1;
int bit2 = (v4 >> 6) & 1;
int bit3 = (v5 >> 6) & 1;
int bit4 = (v4 >> 5) & 1;
int bit5 = (v5 >> 5) & 1;
// and prepend the variable-placement bits depending on mode.
int ohmod = 1 << modeval; // one-hot-mode
if (ohmod & 0xA4)
a |= bit0 << 9;
if (ohmod & 0x8)
a |= bit2 << 9;
if (ohmod & 0x50)
a |= bit4 << 9;
if (ohmod & 0x50)
a |= bit5 << 10;
if (ohmod & 0xA0)
a |= bit1 << 10;
if (ohmod & 0xC0)
a |= bit2 << 11;
if (ohmod & 0x4)
c |= bit1 << 6;
if (ohmod & 0xE8)
c |= bit3 << 6;
if (ohmod & 0x20)
c |= bit2 << 7;
if (ohmod & 0x5B)
b0 |= bit0 << 6;
if (ohmod & 0x5B)
b1 |= bit1 << 6;
if (ohmod & 0x12)
b0 |= bit2 << 7;
if (ohmod & 0x12)
b1 |= bit3 << 7;
if (ohmod & 0xAF)
d0 |= bit4 << 5;
if (ohmod & 0xAF)
d1 |= bit5 << 5;
if (ohmod & 0x5)
d0 |= bit2 << 6;
if (ohmod & 0x5)
d1 |= bit3 << 6;
// sign-extend 'd0' and 'd1'
// note: this code assumes that signed right-shift actually sign-fills, not zero-fills.
int32_t d0x = d0;
int32_t d1x = d1;
int sx_shamt = 32 - dbits;
d0x <<= sx_shamt;
d0x >>= sx_shamt;
d1x <<= sx_shamt;
d1x >>= sx_shamt;
d0 = d0x;
d1 = d1x;
// expand all values to 12 bits, with left-shift as needed.
int val_shamt = (modeval >> 1) ^ 3;
a <<= val_shamt;
b0 <<= val_shamt;
b1 <<= val_shamt;
c <<= val_shamt;
d0 <<= val_shamt;
d1 <<= val_shamt;
// then compute the actual color values.
int red1 = a;
int green1 = a - b0;
int blue1 = a - b1;
int red0 = a - c;
int green0 = a - b0 - c - d0;
int blue0 = a - b1 - c - d1;
// clamp the color components to [0,2^12 - 1]
if (red0 < 0)
red0 = 0;
else if (red0 > 0xFFF)
red0 = 0xFFF;
if (green0 < 0)
green0 = 0;
else if (green0 > 0xFFF)
green0 = 0xFFF;
if (blue0 < 0)
blue0 = 0;
else if (blue0 > 0xFFF)
blue0 = 0xFFF;
if (red1 < 0)
red1 = 0;
else if (red1 > 0xFFF)
red1 = 0xFFF;
if (green1 < 0)
green1 = 0;
else if (green1 > 0xFFF)
green1 = 0xFFF;
if (blue1 < 0)
blue1 = 0;
else if (blue1 > 0xFFF)
blue1 = 0xFFF;
// switch around the color components
int temp0, temp1;
switch (majcomp)
{
case 1: // switch around red and green
temp0 = red0;
temp1 = red1;
red0 = green0;
red1 = green1;
green0 = temp0;
green1 = temp1;
break;
case 2: // switch around red and blue
temp0 = red0;
temp1 = red1;
red0 = blue0;
red1 = blue1;
blue0 = temp0;
blue1 = temp1;
break;
case 0: // no switch
break;
}
*output0 = ushort4(red0 << 4, green0 << 4, blue0 << 4, 0x7800);
*output1 = ushort4(red1 << 4, green1 << 4, blue1 << 4, 0x7800);
}
void hdr_rgb_ldr_alpha_unpack3(const int input[8], int quantization_level, ushort4 * output0, ushort4 * output1)
{
hdr_rgb_unpack3(input, quantization_level, output0, output1);
int v6 = color_unquantization_tables[quantization_level][input[6]];
int v7 = color_unquantization_tables[quantization_level][input[7]];
output0->w = v6;
output1->w = v7;
}
void hdr_luminance_small_range_unpack(const int input[2], int quantization_level, ushort4 * output0, ushort4 * output1)
{
int v0 = color_unquantization_tables[quantization_level][input[0]];
int v1 = color_unquantization_tables[quantization_level][input[1]];
int y0, y1;
if (v0 & 0x80)
{
y0 = ((v1 & 0xE0) << 4) | ((v0 & 0x7F) << 2);
y1 = (v1 & 0x1F) << 2;
}
else
{
y0 = ((v1 & 0xF0) << 4) | ((v0 & 0x7F) << 1);
y1 = (v1 & 0xF) << 1;
}
y1 += y0;
if (y1 > 0xFFF)
y1 = 0xFFF;
*output0 = ushort4(y0 << 4, y0 << 4, y0 << 4, 0x7800);
*output1 = ushort4(y1 << 4, y1 << 4, y1 << 4, 0x7800);
}
void hdr_luminance_large_range_unpack(const int input[2], int quantization_level, ushort4 * output0, ushort4 * output1)
{
int v0 = color_unquantization_tables[quantization_level][input[0]];
int v1 = color_unquantization_tables[quantization_level][input[1]];
int y0, y1;
if (v1 >= v0)
{
y0 = v0 << 4;
y1 = v1 << 4;
}
else
{
y0 = (v1 << 4) + 8;
y1 = (v0 << 4) - 8;
}
*output0 = ushort4(y0 << 4, y0 << 4, y0 << 4, 0x7800);
*output1 = ushort4(y1 << 4, y1 << 4, y1 << 4, 0x7800);
}
void hdr_alpha_unpack(const int input[2], int quantization_level, int *a0, int *a1)
{
int v6 = color_unquantization_tables[quantization_level][input[0]];
int v7 = color_unquantization_tables[quantization_level][input[1]];
int selector = ((v6 >> 7) & 1) | ((v7 >> 6) & 2);
v6 &= 0x7F;
v7 &= 0x7F;
if (selector == 3)
{
*a0 = v6 << 5;
*a1 = v7 << 5;
}
else
{
v6 |= (v7 << (selector + 1)) & 0x780;
v7 &= (0x3f >> selector);
v7 ^= 32 >> selector;
v7 -= 32 >> selector;
v6 <<= (4 - selector);
v7 <<= (4 - selector);
v7 += v6;
if (v7 < 0)
v7 = 0;
else if (v7 > 0xFFF)
v7 = 0xFFF;
*a0 = v6;
*a1 = v7;
}
*a0 <<= 4;
*a1 <<= 4;
}
void hdr_rgb_hdr_alpha_unpack3(const int input[8], int quantization_level, ushort4 * output0, ushort4 * output1)
{
hdr_rgb_unpack3(input, quantization_level, output0, output1);
int alpha0, alpha1;
hdr_alpha_unpack(input + 6, quantization_level, &alpha0, &alpha1);
output0->w = alpha0;
output1->w = alpha1;
}
void unpack_color_endpoints(astc_decode_mode decode_mode, int format, int quantization_level, const int *input, int *rgb_hdr, int *alpha_hdr, int *nan_endpoint, ushort4 * output0, ushort4 * output1)
{
*nan_endpoint = 0;
switch (format)
{
case FMT_LUMINANCE:
*rgb_hdr = 0;
*alpha_hdr = 0;
luminance_unpack(input, quantization_level, output0, output1);
break;
case FMT_LUMINANCE_DELTA:
*rgb_hdr = 0;
*alpha_hdr = 0;
luminance_delta_unpack(input, quantization_level, output0, output1);
break;
case FMT_HDR_LUMINANCE_SMALL_RANGE:
*rgb_hdr = 1;
*alpha_hdr = -1;
hdr_luminance_small_range_unpack(input, quantization_level, output0, output1);
break;
case FMT_HDR_LUMINANCE_LARGE_RANGE:
*rgb_hdr = 1;
*alpha_hdr = -1;
hdr_luminance_large_range_unpack(input, quantization_level, output0, output1);
break;
case FMT_LUMINANCE_ALPHA:
*rgb_hdr = 0;
*alpha_hdr = 0;
luminance_alpha_unpack(input, quantization_level, output0, output1);
break;
case FMT_LUMINANCE_ALPHA_DELTA:
*rgb_hdr = 0;
*alpha_hdr = 0;
luminance_alpha_delta_unpack(input, quantization_level, output0, output1);
break;
case FMT_RGB_SCALE:
*rgb_hdr = 0;
*alpha_hdr = 0;
rgb_scale_unpack(input, quantization_level, output0, output1);
break;
case FMT_RGB_SCALE_ALPHA:
*rgb_hdr = 0;
*alpha_hdr = 0;
rgb_scale_alpha_unpack(input, quantization_level, output0, output1);
break;
case FMT_HDR_RGB_SCALE:
*rgb_hdr = 1;
*alpha_hdr = -1;
hdr_rgbo_unpack3(input, quantization_level, output0, output1);
break;
case FMT_RGB:
*rgb_hdr = 0;
*alpha_hdr = 0;
rgb_unpack(input, quantization_level, output0, output1);
break;
case FMT_RGB_DELTA:
*rgb_hdr = 0;
*alpha_hdr = 0;
rgb_delta_unpack(input, quantization_level, output0, output1);
break;
case FMT_HDR_RGB:
*rgb_hdr = 1;
*alpha_hdr = -1;
hdr_rgb_unpack3(input, quantization_level, output0, output1);
break;
case FMT_RGBA:
*rgb_hdr = 0;
*alpha_hdr = 0;
rgba_unpack(input, quantization_level, output0, output1);
break;
case FMT_RGBA_DELTA:
*rgb_hdr = 0;
*alpha_hdr = 0;
rgba_delta_unpack(input, quantization_level, output0, output1);
break;
case FMT_HDR_RGB_LDR_ALPHA:
*rgb_hdr = 1;
*alpha_hdr = 0;
hdr_rgb_ldr_alpha_unpack3(input, quantization_level, output0, output1);
break;
case FMT_HDR_RGBA:
*rgb_hdr = 1;
*alpha_hdr = 1;
hdr_rgb_hdr_alpha_unpack3(input, quantization_level, output0, output1);
break;
default:
ASTC_CODEC_INTERNAL_ERROR;
}
if (*alpha_hdr == -1)
{
if (alpha_force_use_of_hdr)
{
output0->w = 0x7800;
output1->w = 0x7800;
*alpha_hdr = 1;
}
else
{
output0->w = 0x00FF;
output1->w = 0x00FF;
*alpha_hdr = 0;
}
}
switch (decode_mode)
{
case DECODE_LDR_SRGB:
if (*rgb_hdr == 1)
{
output0->x = 0xFF00;
output0->y = 0x0000;
output0->z = 0xFF00;
output0->w = 0xFF00;
output1->x = 0xFF00;
output1->y = 0x0000;
output1->z = 0xFF00;
output1->w = 0xFF00;
}
else
{
output0->x *= 257;
output0->y *= 257;
output0->z *= 257;
output0->w *= 257;
output1->x *= 257;
output1->y *= 257;
output1->z *= 257;
output1->w *= 257;
}
*rgb_hdr = 0;
*alpha_hdr = 0;
break;
case DECODE_LDR:
if (*rgb_hdr == 1)
{
output0->x = 0xFFFF;
output0->y = 0xFFFF;
output0->z = 0xFFFF;
output0->w = 0xFFFF;
output1->x = 0xFFFF;
output1->y = 0xFFFF;
output1->z = 0xFFFF;
output1->w = 0xFFFF;
*nan_endpoint = 1;
}
else
{
output0->x *= 257;
output0->y *= 257;
output0->z *= 257;
output0->w *= 257;
output1->x *= 257;
output1->y *= 257;
output1->z *= 257;
output1->w *= 257;
}
*rgb_hdr = 0;
*alpha_hdr = 0;
break;
case DECODE_HDR:
if (*rgb_hdr == 0)
{
output0->x *= 257;
output0->y *= 257;
output0->z *= 257;
output1->x *= 257;
output1->y *= 257;
output1->z *= 257;
}
if (*alpha_hdr == 0)
{
output0->w *= 257;
output1->w *= 257;
}
break;
}
}

1792
3rdparty/astc/astc_compress_symbolic.cpp vendored Normal file

File diff suppressed because it is too large Load Diff

524
3rdparty/astc/astc_compute_variance.cpp vendored Normal file
View File

@@ -0,0 +1,524 @@
/*----------------------------------------------------------------------------*/
/**
* This confidential and proprietary software may be used only as
* authorised by a licensing agreement from ARM Limited
* (C) COPYRIGHT 2011-2012 ARM Limited
* ALL RIGHTS RESERVED
*
* The entire notice above must be reproduced on all authorised
* copies and copies may only be made to the extent permitted
* by a licensing agreement from ARM Limited.
*
* @brief ASTC functions to calculate, for each pixel and each color component,
* its variance within an NxN footprint; we want N to be parametric.
*
* The routine below uses summed area tables in order to perform the
* computation in O(1) time per pixel, independent of big N is.
*/
/*----------------------------------------------------------------------------*/
#include "astc_codec_internals.h"
#include <math.h>
#include "mathlib.h"
#include "softfloat.h"
float4 *** input_averages;
float *** input_alpha_averages;
float4 *** input_variances;
#include <stdio.h>
// routine to compute averages and variances for a pixel region.
// The routine computes both in a single pass, using a summed-area table
// to decouple the running time from the averaging/variance kernel size.
static void compute_pixel_region_variance(const astc_codec_image * img, float rgb_power_to_use, float alpha_power_to_use, swizzlepattern swz, int use_z_axis,
int source_xoffset,int source_yoffset, int source_zoffset, // position of upper-left pixel in data set
int xsize, int ysize, int zsize, // the size of the region to actually compute averages and variances for.
int avg_var_kernel_radius, int alpha_kernel_radius,
int dest_xoffset, int dest_yoffset, int dest_zoffset)
{
int x, y, z;
int kernel_radius = MAX(avg_var_kernel_radius, alpha_kernel_radius);
int kerneldim = 2 * kernel_radius + 1;
// allocate memory
int xpadsize = xsize + kerneldim;
int ypadsize = ysize + kerneldim;
int zpadsize = zsize + (use_z_axis ? kerneldim : 1);
double4 ***varbuf1 = new double4 **[zpadsize];
double4 ***varbuf2 = new double4 **[zpadsize];
varbuf1[0] = new double4 *[ypadsize * zpadsize];
varbuf2[0] = new double4 *[ypadsize * zpadsize];
varbuf1[0][0] = new double4[xpadsize * ypadsize * zpadsize];
varbuf2[0][0] = new double4[xpadsize * ypadsize * zpadsize];
for (z = 1; z < zpadsize; z++)
{
varbuf1[z] = varbuf1[0] + ypadsize * z;
varbuf2[z] = varbuf2[0] + ypadsize * z;
varbuf1[z][0] = varbuf1[0][0] + xpadsize * ypadsize * z;
varbuf2[z][0] = varbuf2[0][0] + xpadsize * ypadsize * z;
}
for (z = 0; z < zpadsize; z++)
for (y = 1; y < ypadsize; y++)
{
varbuf1[z][y] = varbuf1[z][0] + xpadsize * y;
varbuf2[z][y] = varbuf2[z][0] + xpadsize * y;
}
int powers_are_1 = (rgb_power_to_use == 1.0f) && (alpha_power_to_use == 1.0f);
// load x and x^2 values into the allocated buffers
if (img->imagedata8)
{
uint8_t data[6];
data[4] = 0;
data[5] = 255;
for (z = 0; z < zpadsize - 1; z++)
{
int z_src = z + source_zoffset - (use_z_axis ? kernel_radius : 0);
for (y = 0; y < ypadsize - 1; y++)
{
int y_src = y + source_yoffset - kernel_radius;
for (x = 0; x < xpadsize - 1; x++)
{
int x_src = x + source_xoffset - kernel_radius;
data[0] = img->imagedata8[z_src][y_src][4 * x_src + 0];
data[1] = img->imagedata8[z_src][y_src][4 * x_src + 1];
data[2] = img->imagedata8[z_src][y_src][4 * x_src + 2];
data[3] = img->imagedata8[z_src][y_src][4 * x_src + 3];
uint8_t r = data[swz.r];
uint8_t g = data[swz.g];
uint8_t b = data[swz.b];
uint8_t a = data[swz.a];
double4 d = double4(r * (1.0 / 255.0),
g * (1.0 / 255.0),
b * (1.0 / 255.0),
a * (1.0 / 255.0));
if (perform_srgb_transform)
{
d.x = (d.x <= 0.04045) ? d.x * (1.0 / 12.92) : (d.x <= 1) ? pow((d.x + 0.055) * (1.0 / 1.055), 2.4) : d.x;
d.y = (d.y <= 0.04045) ? d.y * (1.0 / 12.92) : (d.y <= 1) ? pow((d.y + 0.055) * (1.0 / 1.055), 2.4) : d.y;
d.z = (d.z <= 0.04045) ? d.z * (1.0 / 12.92) : (d.z <= 1) ? pow((d.z + 0.055) * (1.0 / 1.055), 2.4) : d.z;
}
if (!powers_are_1)
{
d.x = pow(MAX(d.x, 1e-6), (double)rgb_power_to_use);
d.y = pow(MAX(d.y, 1e-6), (double)rgb_power_to_use);
d.z = pow(MAX(d.z, 1e-6), (double)rgb_power_to_use);
d.w = pow(MAX(d.w, 1e-6), (double)alpha_power_to_use);
}
varbuf1[z][y][x] = d;
varbuf2[z][y][x] = d * d;
}
}
}
}
else
{
uint16_t data[6];
data[4] = 0;
data[5] = 0x3C00; // 1.0 encoded as FP16.
for (z = 0; z < zpadsize - 1; z++)
{
int z_src = z + source_zoffset - (use_z_axis ? kernel_radius : 0);
for (y = 0; y < ypadsize - 1; y++)
{
int y_src = y + source_yoffset - kernel_radius;
for (x = 0; x < xpadsize - 1; x++)
{
int x_src = x + source_xoffset - kernel_radius;
data[0] = img->imagedata16[z_src][y_src][4 * x_src];
data[1] = img->imagedata16[z_src][y_src][4 * x_src + 1];
data[2] = img->imagedata16[z_src][y_src][4 * x_src + 2];
data[3] = img->imagedata16[z_src][y_src][4 * x_src + 3];
uint16_t r = data[swz.r];
uint16_t g = data[swz.g];
uint16_t b = data[swz.b];
uint16_t a = data[swz.a];
double4 d = double4(sf16_to_float(r),
sf16_to_float(g),
sf16_to_float(b),
sf16_to_float(a));
if (perform_srgb_transform)
{
d.x = (d.x <= 0.04045) ? d.x * (1.0 / 12.92) : (d.x <= 1) ? pow((d.x + 0.055) * (1.0 / 1.055), 2.4) : d.x;
d.y = (d.y <= 0.04045) ? d.y * (1.0 / 12.92) : (d.y <= 1) ? pow((d.y + 0.055) * (1.0 / 1.055), 2.4) : d.y;
d.z = (d.z <= 0.04045) ? d.z * (1.0 / 12.92) : (d.z <= 1) ? pow((d.z + 0.055) * (1.0 / 1.055), 2.4) : d.z;
}
if (!powers_are_1)
{
d.x = pow(MAX(d.x, 1e-6), (double)rgb_power_to_use);
d.y = pow(MAX(d.y, 1e-6), (double)rgb_power_to_use);
d.z = pow(MAX(d.z, 1e-6), (double)rgb_power_to_use);
d.w = pow(MAX(d.w, 1e-6), (double)alpha_power_to_use);
}
varbuf1[z][y][x] = d;
varbuf2[z][y][x] = d * d;
}
}
}
}
// pad out buffers with 0s
for (z = 0; z < zpadsize; z++)
{
for (y = 0; y < ypadsize; y++)
{
varbuf1[z][y][xpadsize - 1] = double4(0.0, 0.0, 0.0, 0.0);
varbuf2[z][y][xpadsize - 1] = double4(0.0, 0.0, 0.0, 0.0);
}
for (x = 0; x < xpadsize; x++)
{
varbuf1[z][ypadsize - 1][x] = double4(0.0, 0.0, 0.0, 0.0);
varbuf2[z][ypadsize - 1][x] = double4(0.0, 0.0, 0.0, 0.0);
}
}
if (use_z_axis)
for (y = 0; y < ypadsize; y++)
for (x = 0; x < xpadsize; x++)
{
varbuf1[zpadsize - 1][y][x] = double4(0.0, 0.0, 0.0, 0.0);
varbuf2[zpadsize - 1][y][x] = double4(0.0, 0.0, 0.0, 0.0);
}
// generate summed-area tables for x and x2; this is done in-place
for (z = 0; z < zpadsize; z++)
for (y = 0; y < ypadsize; y++)
{
double4 summa1 = double4(0.0, 0.0, 0.0, 0.0);
double4 summa2 = double4(0.0, 0.0, 0.0, 0.0);
for (x = 0; x < xpadsize; x++)
{
double4 val1 = varbuf1[z][y][x];
double4 val2 = varbuf2[z][y][x];
varbuf1[z][y][x] = summa1;
varbuf2[z][y][x] = summa2;
summa1 = summa1 + val1;
summa2 = summa2 + val2;
}
}
for (z = 0; z < zpadsize; z++)
for (x = 0; x < xpadsize; x++)
{
double4 summa1 = double4(0.0, 0.0, 0.0, 0.0);
double4 summa2 = double4(0.0, 0.0, 0.0, 0.0);
for (y = 0; y < ypadsize; y++)
{
double4 val1 = varbuf1[z][y][x];
double4 val2 = varbuf2[z][y][x];
varbuf1[z][y][x] = summa1;
varbuf2[z][y][x] = summa2;
summa1 = summa1 + val1;
summa2 = summa2 + val2;
}
}
if (use_z_axis)
for (y = 0; y < ypadsize; y++)
for (x = 0; x < xpadsize; x++)
{
double4 summa1 = double4(0.0, 0.0, 0.0, 0.0);
double4 summa2 = double4(0.0, 0.0, 0.0, 0.0);
for (z = 0; z < zpadsize; z++)
{
double4 val1 = varbuf1[z][y][x];
double4 val2 = varbuf2[z][y][x];
varbuf1[z][y][x] = summa1;
varbuf2[z][y][x] = summa2;
summa1 = summa1 + val1;
summa2 = summa2 + val2;
}
}
int avg_var_kerneldim = 2 * avg_var_kernel_radius + 1;
int alpha_kerneldim = 2 * alpha_kernel_radius + 1;
// compute a few constants used in the variance-calculation.
double avg_var_samples;
double alpha_rsamples;
double mul1;
if (use_z_axis)
{
avg_var_samples = avg_var_kerneldim * avg_var_kerneldim * avg_var_kerneldim;
alpha_rsamples = 1.0 / (alpha_kerneldim * alpha_kerneldim * alpha_kerneldim);
}
else
{
avg_var_samples = avg_var_kerneldim * avg_var_kerneldim;
alpha_rsamples = 1.0 / (alpha_kerneldim * alpha_kerneldim);
}
double avg_var_rsamples = 1.0 / avg_var_samples;
if (avg_var_samples == 1)
mul1 = 1.0;
else
mul1 = 1.0 / (avg_var_samples * (avg_var_samples - 1));
double mul2 = avg_var_samples * mul1;
// use the summed-area tables to compute variance for each sample-neighborhood
if (use_z_axis)
{
for (z = 0; z < zsize; z++)
{
int z_src = z + kernel_radius;
int z_dst = z + dest_zoffset;
for (y = 0; y < ysize; y++)
{
int y_src = y + kernel_radius;
int y_dst = y + dest_yoffset;
for (x = 0; x < xsize; x++)
{
int x_src = x + kernel_radius;
int x_dst = x + dest_xoffset;
// summed-area table lookups for alpha average
double vasum =
(varbuf1[z_src + 1][y_src - alpha_kernel_radius][x_src - alpha_kernel_radius].w
- varbuf1[z_src + 1][y_src - alpha_kernel_radius][x_src + alpha_kernel_radius + 1].w
- varbuf1[z_src + 1][y_src + alpha_kernel_radius + 1][x_src - alpha_kernel_radius].w
+ varbuf1[z_src + 1][y_src + alpha_kernel_radius + 1][x_src + alpha_kernel_radius + 1].w) -
(varbuf1[z_src][y_src - alpha_kernel_radius][x_src - alpha_kernel_radius].w
- varbuf1[z_src][y_src - alpha_kernel_radius][x_src + alpha_kernel_radius + 1].w
- varbuf1[z_src][y_src + alpha_kernel_radius + 1][x_src - alpha_kernel_radius].w + varbuf1[z_src][y_src + alpha_kernel_radius + 1][x_src + alpha_kernel_radius + 1].w);
input_alpha_averages[z_dst][y_dst][x_dst] = static_cast < float >(vasum * alpha_rsamples);
// summed-area table lookups for RGBA average
double4 v0sum =
(varbuf1[z_src + 1][y_src - avg_var_kernel_radius][x_src - avg_var_kernel_radius]
- varbuf1[z_src + 1][y_src - avg_var_kernel_radius][x_src + avg_var_kernel_radius + 1]
- varbuf1[z_src + 1][y_src + avg_var_kernel_radius + 1][x_src - avg_var_kernel_radius]
+ varbuf1[z_src + 1][y_src + avg_var_kernel_radius + 1][x_src + avg_var_kernel_radius + 1]) -
(varbuf1[z_src][y_src - avg_var_kernel_radius][x_src - avg_var_kernel_radius]
- varbuf1[z_src][y_src - avg_var_kernel_radius][x_src + avg_var_kernel_radius + 1]
- varbuf1[z_src][y_src + avg_var_kernel_radius + 1][x_src - avg_var_kernel_radius] + varbuf1[z_src][y_src + avg_var_kernel_radius + 1][x_src + avg_var_kernel_radius + 1]);
double4 avg = v0sum * avg_var_rsamples;
float4 favg = float4(static_cast < float >(avg.x),
static_cast < float >(avg.y),
static_cast < float >(avg.z),
static_cast < float >(avg.w));
input_averages[z_dst][y_dst][x_dst] = favg;
// summed-area table lookups for variance
double4 v1sum =
(varbuf1[z_src + 1][y_src - avg_var_kernel_radius][x_src - avg_var_kernel_radius]
- varbuf1[z_src + 1][y_src - avg_var_kernel_radius][x_src + avg_var_kernel_radius + 1]
- varbuf1[z_src + 1][y_src + avg_var_kernel_radius + 1][x_src - avg_var_kernel_radius]
+ varbuf1[z_src + 1][y_src + avg_var_kernel_radius + 1][x_src + avg_var_kernel_radius + 1]) -
(varbuf1[z_src][y_src - avg_var_kernel_radius][x_src - avg_var_kernel_radius]
- varbuf1[z_src][y_src - avg_var_kernel_radius][x_src + avg_var_kernel_radius + 1]
- varbuf1[z_src][y_src + avg_var_kernel_radius + 1][x_src - avg_var_kernel_radius] + varbuf1[z_src][y_src + avg_var_kernel_radius + 1][x_src + avg_var_kernel_radius + 1]);
double4 v2sum =
(varbuf2[z_src + 1][y_src - avg_var_kernel_radius][x_src - avg_var_kernel_radius]
- varbuf2[z_src + 1][y_src - avg_var_kernel_radius][x_src + avg_var_kernel_radius + 1]
- varbuf2[z_src + 1][y_src + avg_var_kernel_radius + 1][x_src - avg_var_kernel_radius]
+ varbuf2[z_src + 1][y_src + avg_var_kernel_radius + 1][x_src + avg_var_kernel_radius + 1]) -
(varbuf2[z_src][y_src - avg_var_kernel_radius][x_src - avg_var_kernel_radius]
- varbuf2[z_src][y_src - avg_var_kernel_radius][x_src + avg_var_kernel_radius + 1]
- varbuf2[z_src][y_src + avg_var_kernel_radius + 1][x_src - avg_var_kernel_radius] + varbuf2[z_src][y_src + avg_var_kernel_radius + 1][x_src + avg_var_kernel_radius + 1]);
// the actual variance
double4 variance = mul2 * v2sum - mul1 * (v1sum * v1sum);
float4 fvar = float4(static_cast < float >(variance.x),
static_cast < float >(variance.y),
static_cast < float >(variance.z),
static_cast < float >(variance.w));
input_variances[z_dst][y_dst][x_dst] = fvar;
}
}
}
}
else
{
for (z = 0; z < zsize; z++)
{
int z_src = z;
int z_dst = z + dest_zoffset;
for (y = 0; y < ysize; y++)
{
int y_src = y + kernel_radius;
int y_dst = y + dest_yoffset;
for (x = 0; x < xsize; x++)
{
int x_src = x + kernel_radius;
int x_dst = x + dest_xoffset;
// summed-area table lookups for alpha average
double vasum =
varbuf1[z_src][y_src - alpha_kernel_radius][x_src - alpha_kernel_radius].w
- varbuf1[z_src][y_src - alpha_kernel_radius][x_src + alpha_kernel_radius + 1].w
- varbuf1[z_src][y_src + alpha_kernel_radius + 1][x_src - alpha_kernel_radius].w + varbuf1[z_src][y_src + alpha_kernel_radius + 1][x_src + alpha_kernel_radius + 1].w;
input_alpha_averages[z_dst][y_dst][x_dst] = static_cast < float >(vasum * alpha_rsamples);
// summed-area table lookups for RGBA average
double4 v0sum =
varbuf1[z_src][y_src - avg_var_kernel_radius][x_src - avg_var_kernel_radius]
- varbuf1[z_src][y_src - avg_var_kernel_radius][x_src + avg_var_kernel_radius + 1]
- varbuf1[z_src][y_src + avg_var_kernel_radius + 1][x_src - avg_var_kernel_radius] + varbuf1[z_src][y_src + avg_var_kernel_radius + 1][x_src + avg_var_kernel_radius + 1];
double4 avg = v0sum * avg_var_rsamples;
float4 favg = float4(static_cast < float >(avg.x),
static_cast < float >(avg.y),
static_cast < float >(avg.z),
static_cast < float >(avg.w));
input_averages[z_dst][y_dst][x_dst] = favg;
// summed-area table lookups for variance
double4 v1sum =
varbuf1[z_src][y_src - avg_var_kernel_radius][x_src - avg_var_kernel_radius]
- varbuf1[z_src][y_src - avg_var_kernel_radius][x_src + avg_var_kernel_radius + 1]
- varbuf1[z_src][y_src + avg_var_kernel_radius + 1][x_src - avg_var_kernel_radius] + varbuf1[z_src][y_src + avg_var_kernel_radius + 1][x_src + avg_var_kernel_radius + 1];
double4 v2sum =
varbuf2[z_src][y_src - avg_var_kernel_radius][x_src - avg_var_kernel_radius]
- varbuf2[z_src][y_src - avg_var_kernel_radius][x_src + avg_var_kernel_radius + 1]
- varbuf2[z_src][y_src + avg_var_kernel_radius + 1][x_src - avg_var_kernel_radius] + varbuf2[z_src][y_src + avg_var_kernel_radius + 1][x_src + avg_var_kernel_radius + 1];
// the actual variance
double4 variance = mul2 * v2sum - mul1 * (v1sum * v1sum);
float4 fvar = float4(static_cast < float >(variance.x),
static_cast < float >(variance.y),
static_cast < float >(variance.z),
static_cast < float >(variance.w));
input_variances[z_dst][y_dst][x_dst] = fvar;
}
}
}
}
delete[]varbuf2[0][0];
delete[]varbuf1[0][0];
delete[]varbuf2[0];
delete[]varbuf1[0];
delete[]varbuf2;
delete[]varbuf1;
}
static void allocate_input_average_and_variance_buffers(int xsize, int ysize, int zsize)
{
int y, z;
if (input_averages)
{
delete[]input_averages[0][0];
delete[]input_averages[0];
delete[]input_averages;
}
if (input_variances)
{
delete[]input_variances[0][0];
delete[]input_variances[0];
delete[]input_variances;
}
if (input_alpha_averages)
{
delete[]input_alpha_averages[0][0];
delete[]input_alpha_averages[0];
delete[]input_alpha_averages;
}
input_averages = new float4 **[zsize];
input_variances = new float4 **[zsize];
input_alpha_averages = new float **[zsize];
input_averages[0] = new float4 *[ysize * zsize];
input_variances[0] = new float4 *[ysize * zsize];
input_alpha_averages[0] = new float *[ysize * zsize];
input_averages[0][0] = new float4[xsize * ysize * zsize];
input_variances[0][0] = new float4[xsize * ysize * zsize];
input_alpha_averages[0][0] = new float[xsize * ysize * zsize];
for (z = 1; z < zsize; z++)
{
input_averages[z] = input_averages[0] + z * ysize;
input_variances[z] = input_variances[0] + z * ysize;
input_alpha_averages[z] = input_alpha_averages[0] + z * ysize;
input_averages[z][0] = input_averages[0][0] + z * ysize * xsize;
input_variances[z][0] = input_variances[0][0] + z * ysize * xsize;
input_alpha_averages[z][0] = input_alpha_averages[0][0] + z * ysize * xsize;
}
for (z = 0; z < zsize; z++)
for (y = 1; y < ysize; y++)
{
input_averages[z][y] = input_averages[z][0] + y * xsize;
input_variances[z][y] = input_variances[z][0] + y * xsize;
input_alpha_averages[z][y] = input_alpha_averages[z][0] + y * xsize;
}
}
// compute averages and variances for the current input image.
void compute_averages_and_variances(const astc_codec_image * img, float rgb_power_to_use, float alpha_power_to_use, int avg_var_kernel_radius, int alpha_kernel_radius, swizzlepattern swz)
{
int xsize = img->xsize;
int ysize = img->ysize;
int zsize = img->zsize;
allocate_input_average_and_variance_buffers(xsize, ysize, zsize);
int x, y, z;
for (z = 0; z < zsize; z += 32)
{
int zblocksize = MIN(32, zsize - z);
for (y = 0; y < ysize; y += 32)
{
int yblocksize = MIN(32, ysize - y);
for (x = 0; x < xsize; x += 32)
{
int xblocksize = MIN(32, xsize - x);
compute_pixel_region_variance(img,
rgb_power_to_use,
alpha_power_to_use,
swz,
(zsize > 1),
x + img->padding,
y + img->padding, z + (zsize > 1 ? img->padding : 0), xblocksize, yblocksize, zblocksize, avg_var_kernel_radius, alpha_kernel_radius, x, y, z);
}
}
}
}

View File

@@ -0,0 +1,317 @@
/*----------------------------------------------------------------------------*/
/**
* This confidential and proprietary software may be used only as
* authorised by a licensing agreement from ARM Limited
* (C) COPYRIGHT 2011-2012 ARM Limited
* ALL RIGHTS RESERVED
*
* The entire notice above must be reproduced on all authorised
* copies and copies may only be made to the extent permitted
* by a licensing agreement from ARM Limited.
*
* @brief Decompress a block of colors, expressed as a symbolic block,
* for ASTC.
*/
/*----------------------------------------------------------------------------*/
#include <math.h>
#include "astc_codec_internals.h"
#include "softfloat.h"
#include <stdio.h>
int compute_value_of_texel_int(int texel_to_get, const decimation_table * it, const int *weights)
{
int i;
int summed_value = 8;
int weights_to_evaluate = it->texel_num_weights[texel_to_get];
for (i = 0; i < weights_to_evaluate; i++)
{
summed_value += weights[it->texel_weights[texel_to_get][i]] * it->texel_weights_int[texel_to_get][i];
}
return summed_value >> 4;
}
ushort4 lerp_color_int(astc_decode_mode decode_mode, ushort4 color0, ushort4 color1, int weight, int plane2_weight, int plane2_color_component // -1 in 1-plane mode
)
{
int4 ecolor0 = int4(color0.x, color0.y, color0.z, color0.w);
int4 ecolor1 = int4(color1.x, color1.y, color1.z, color1.w);
int4 eweight1 = int4(weight, weight, weight, weight);
switch (plane2_color_component)
{
case 0:
eweight1.x = plane2_weight;
break;
case 1:
eweight1.y = plane2_weight;
break;
case 2:
eweight1.z = plane2_weight;
break;
case 3:
eweight1.w = plane2_weight;
break;
default:
break;
}
int4 eweight0 = int4(64, 64, 64, 64) - eweight1;
if (decode_mode == DECODE_LDR_SRGB)
{
ecolor0 = ecolor0 >> 8;
ecolor1 = ecolor1 >> 8;
}
int4 color = (ecolor0 * eweight0) + (ecolor1 * eweight1) + int4(32, 32, 32, 32);
color = color >> 6;
if (decode_mode == DECODE_LDR_SRGB)
color = color | (color << 8);
ushort4 rcolor = ushort4(color.x, color.y, color.z, color.w);
return rcolor;
}
void decompress_symbolic_block(astc_decode_mode decode_mode,
int xdim, int ydim, int zdim, // dimensions of block
int xpos, int ypos, int zpos, // position of block
const symbolic_compressed_block * scb, imageblock * blk)
{
blk->xpos = xpos;
blk->ypos = ypos;
blk->zpos = zpos;
int i;
// if we detected an error-block, blow up immediately.
if (scb->error_block)
{
if (decode_mode == DECODE_LDR_SRGB)
{
for (i = 0; i < xdim * ydim * zdim; i++)
{
blk->orig_data[4 * i] = 1.0f;
blk->orig_data[4 * i + 1] = 0.0f;
blk->orig_data[4 * i + 2] = 1.0f;
blk->orig_data[4 * i + 3] = 1.0f;
blk->rgb_lns[i] = 0;
blk->alpha_lns[i] = 0;
blk->nan_texel[i] = 0;
}
}
else
{
for (i = 0; i < xdim * ydim * zdim; i++)
{
blk->orig_data[4 * i] = 0.0f;
blk->orig_data[4 * i + 1] = 0.0f;
blk->orig_data[4 * i + 2] = 0.0f;
blk->orig_data[4 * i + 3] = 0.0f;
blk->rgb_lns[i] = 0;
blk->alpha_lns[i] = 0;
blk->nan_texel[i] = 1;
}
}
imageblock_initialize_work_from_orig(blk, xdim * ydim * zdim);
update_imageblock_flags(blk, xdim, ydim, zdim);
return;
}
if (scb->block_mode < 0)
{
float red = 0, green = 0, blue = 0, alpha = 0;
int use_lns = 0;
int use_nan = 0;
if (scb->block_mode == -2)
{
// For sRGB decoding, we should return only the top 8 bits.
int mask = (decode_mode == DECODE_LDR_SRGB) ? 0xFF00 : 0xFFFF;
red = sf16_to_float(unorm16_to_sf16(scb->constant_color[0] & mask));
green = sf16_to_float(unorm16_to_sf16(scb->constant_color[1] & mask));
blue = sf16_to_float(unorm16_to_sf16(scb->constant_color[2] & mask));
alpha = sf16_to_float(unorm16_to_sf16(scb->constant_color[3] & mask));
use_lns = 0;
use_nan = 0;
}
else
{
switch (decode_mode)
{
case DECODE_LDR_SRGB:
red = 1.0f;
green = 0.0f;
blue = 1.0f;
alpha = 1.0f;
use_lns = 0;
use_nan = 0;
break;
case DECODE_LDR:
red = 0.0f;
green = 0.0f;
blue = 0.0f;
alpha = 0.0f;
use_lns = 0;
use_nan = 1;
break;
case DECODE_HDR:
// constant-color block; unpack from FP16 to FP32.
red = sf16_to_float(scb->constant_color[0]);
green = sf16_to_float(scb->constant_color[1]);
blue = sf16_to_float(scb->constant_color[2]);
alpha = sf16_to_float(scb->constant_color[3]);
use_lns = 1;
use_nan = 0;
break;
}
}
for (i = 0; i < xdim * ydim * zdim; i++)
{
blk->orig_data[4 * i] = red;
blk->orig_data[4 * i + 1] = green;
blk->orig_data[4 * i + 2] = blue;
blk->orig_data[4 * i + 3] = alpha;
blk->rgb_lns[i] = use_lns;
blk->alpha_lns[i] = use_lns;
blk->nan_texel[i] = use_nan;
}
imageblock_initialize_work_from_orig(blk, xdim * ydim * zdim);
update_imageblock_flags(blk, xdim, ydim, zdim);
return;
}
// get the appropriate partition-table entry
int partition_count = scb->partition_count;
const partition_info *pt = get_partition_table(xdim, ydim, zdim, partition_count);
pt += scb->partition_index;
// get the appropriate block descriptor
const block_size_descriptor *bsd = get_block_size_descriptor(xdim, ydim, zdim);
const decimation_table *const *ixtab2 = bsd->decimation_tables;
const decimation_table *it = ixtab2[bsd->block_modes[scb->block_mode].decimation_mode];
int is_dual_plane = bsd->block_modes[scb->block_mode].is_dual_plane;
int weight_quantization_level = bsd->block_modes[scb->block_mode].quantization_mode;
// decode the color endpoints
ushort4 color_endpoint0[4];
ushort4 color_endpoint1[4];
int rgb_hdr_endpoint[4];
int alpha_hdr_endpoint[4];
int nan_endpoint[4];
for (i = 0; i < partition_count; i++)
unpack_color_endpoints(decode_mode,
scb->color_formats[i],
scb->color_quantization_level, scb->color_values[i], &(rgb_hdr_endpoint[i]), &(alpha_hdr_endpoint[i]), &(nan_endpoint[i]), &(color_endpoint0[i]), &(color_endpoint1[i]));
// first unquantize the weights
int uq_plane1_weights[MAX_WEIGHTS_PER_BLOCK];
int uq_plane2_weights[MAX_WEIGHTS_PER_BLOCK];
int weight_count = it->num_weights;
const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[weight_quantization_level]);
for (i = 0; i < weight_count; i++)
{
uq_plane1_weights[i] = qat->unquantized_value[scb->plane1_weights[i]];
}
if (is_dual_plane)
{
for (i = 0; i < weight_count; i++)
uq_plane2_weights[i] = qat->unquantized_value[scb->plane2_weights[i]];
}
// then undecimate them.
int weights[MAX_TEXELS_PER_BLOCK];
int plane2_weights[MAX_TEXELS_PER_BLOCK];
int texels_per_block = xdim * ydim * zdim;
for (i = 0; i < texels_per_block; i++)
weights[i] = compute_value_of_texel_int(i, it, uq_plane1_weights);
if (is_dual_plane)
for (i = 0; i < texels_per_block; i++)
plane2_weights[i] = compute_value_of_texel_int(i, it, uq_plane2_weights);
int plane2_color_component = scb->plane2_color_component;
// now that we have endpoint colors and weights, we can unpack actual colors for
// each texel.
for (i = 0; i < texels_per_block; i++)
{
int partition = pt->partition_of_texel[i];
ushort4 color = lerp_color_int(decode_mode,
color_endpoint0[partition],
color_endpoint1[partition],
weights[i],
plane2_weights[i],
is_dual_plane ? plane2_color_component : -1);
blk->rgb_lns[i] = rgb_hdr_endpoint[partition];
blk->alpha_lns[i] = alpha_hdr_endpoint[partition];
blk->nan_texel[i] = nan_endpoint[partition];
blk->work_data[4 * i] = color.x;
blk->work_data[4 * i + 1] = color.y;
blk->work_data[4 * i + 2] = color.z;
blk->work_data[4 * i + 3] = color.w;
}
imageblock_initialize_orig_from_work(blk, xdim * ydim * zdim);
update_imageblock_flags(blk, xdim, ydim, zdim);
}
float compute_imageblock_difference(int xdim, int ydim, int zdim, const imageblock * p1, const imageblock * p2, const error_weight_block * ewb)
{
int i;
int texels_per_block = xdim * ydim * zdim;
float summa = 0.0f;
const float *f1 = p1->work_data;
const float *f2 = p2->work_data;
for (i = 0; i < texels_per_block; i++)
{
float rdiff = fabsf(f1[4 * i] - f2[4 * i]);
float gdiff = fabs(f1[4 * i + 1] - f2[4 * i + 1]);
float bdiff = fabs(f1[4 * i + 2] - f2[4 * i + 2]);
float adiff = fabs(f1[4 * i + 3] - f2[4 * i + 3]);
rdiff = MIN(rdiff, 1e15f);
gdiff = MIN(gdiff, 1e15f);
bdiff = MIN(bdiff, 1e15f);
adiff = MIN(adiff, 1e15f);
summa += rdiff * rdiff * ewb->error_weights[i].x + gdiff * gdiff * ewb->error_weights[i].y + bdiff * bdiff * ewb->error_weights[i].z + adiff * adiff * ewb->error_weights[i].w;
}
return summa;
}

View File

@@ -0,0 +1,310 @@
/*----------------------------------------------------------------------------*/
/**
* This confidential and proprietary software may be used only as
* authorised by a licensing agreement from ARM Limited
* (C) COPYRIGHT 2011-2012 ARM Limited
* ALL RIGHTS RESERVED
*
* The entire notice above must be reproduced on all authorised
* copies and copies may only be made to the extent permitted
* by a licensing agreement from ARM Limited.
*
* @brief Determine color errors for ASTC compression.
*
* We assume that there are two independent sources of color error in
* any given partition.
*
* These are:
* * quantization errors
* * encoding choice errors
*
* Encoding choice errors are errors that come due to encoding choice,
* such as:
* * using luminance instead of RGB
* * using RGB-scale instead of two RGB endpoints.
* * dropping Alpha
*
* Quantization errors occur due to the limited precision we use for
* storing numbers.
*
* Quantization errors generally scale with quantization level, but are
* not actually independent of color encoding. In particular:
* * if we can use offset encoding then quantization error is halved.
* * if we can use blue-contraction, quantization error for red and
* green is halved.
* * quantization error is higher for the HDR endpoint modes.
*
* Other than these errors, quantization error is assumed to be
* proportional to the quantization step.
*/
/*----------------------------------------------------------------------------*/
#include "astc_codec_internals.h"
#include <math.h>
#ifdef DEBUG_PRINT_DIAGNOSTICS
#include <stdio.h>
#endif
// helper function to merge two endpoint-colors
void merge_endpoints(const endpoints * ep1, // contains three of the color components
const endpoints * ep2, // contains the remaining color component
int separate_component, endpoints * res)
{
int i;
int partition_count = ep1->partition_count;
res->partition_count = partition_count;
for (i = 0; i < partition_count; i++)
{
res->endpt0[i] = ep1->endpt0[i];
res->endpt1[i] = ep1->endpt1[i];
}
switch (separate_component)
{
case 0:
for (i = 0; i < partition_count; i++)
{
res->endpt0[i].x = ep2->endpt0[i].x;
res->endpt1[i].x = ep2->endpt1[i].x;
}
break;
case 1:
for (i = 0; i < partition_count; i++)
{
res->endpt0[i].y = ep2->endpt0[i].y;
res->endpt1[i].y = ep2->endpt1[i].y;
}
break;
case 2:
for (i = 0; i < partition_count; i++)
{
res->endpt0[i].z = ep2->endpt0[i].z;
res->endpt1[i].z = ep2->endpt1[i].z;
}
break;
case 3:
for (i = 0; i < partition_count; i++)
{
res->endpt0[i].w = ep2->endpt0[i].w;
res->endpt1[i].w = ep2->endpt1[i].w;
}
break;
}
}
/*
for a given set of input colors and a given partitioning, determine: color error that results
from RGB-scale encoding (relevant for LDR only) color error that results from RGB-lumashift encoding
(relevant for HDR only) color error that results from luminance-encoding color error that results
form dropping alpha. whether we are eligible for offset encoding whether we are eligible for
blue-contraction
The input data are: color data partitioning error-weight data
*/
void compute_encoding_choice_errors(int xdim, int ydim, int zdim, const imageblock * pb, const partition_info * pi, const error_weight_block * ewb,
int separate_component, // component that is separated out in 2-plane mode, -1 in 1-plane mode
encoding_choice_errors * eci)
{
int i;
int partition_count = pi->partition_count;
int texels_per_block = xdim * ydim * zdim;
#ifdef DEBUG_PRINT_DIAGNOSTICS
if (print_diagnostics)
{
printf("%s : texels-per-block=%dx%dx%d, separate_component=%d, partition-count=%d\n", __func__, xdim, ydim, zdim, separate_component, partition_count);
}
#endif
float3 averages[4];
float3 directions_rgb[4];
float2 directions_rg[4];
float2 directions_rb[4];
float2 directions_gb[4];
float4 error_weightings[4];
float4 color_scalefactors[4];
float4 inverse_color_scalefactors[4];
compute_partition_error_color_weightings(xdim, ydim, zdim, ewb, pi, error_weightings, color_scalefactors);
compute_averages_and_directions_rgb(pi, pb, ewb, color_scalefactors, averages, directions_rgb, directions_rg, directions_rb, directions_gb);
line3 uncorr_rgb_lines[4];
line3 samechroma_rgb_lines[4]; // for LDR-RGB-scale
line3 rgb_luma_lines[4]; // for HDR-RGB-scale
line3 luminance_lines[4];
processed_line3 proc_uncorr_rgb_lines[4];
processed_line3 proc_samechroma_rgb_lines[4]; // for LDR-RGB-scale
processed_line3 proc_rgb_luma_lines[4]; // for HDR-RGB-scale
processed_line3 proc_luminance_lines[4];
for (i = 0; i < partition_count; i++)
{
inverse_color_scalefactors[i].x = 1.0f / MAX(color_scalefactors[i].x, 1e-7f);
inverse_color_scalefactors[i].y = 1.0f / MAX(color_scalefactors[i].y, 1e-7f);
inverse_color_scalefactors[i].z = 1.0f / MAX(color_scalefactors[i].z, 1e-7f);
inverse_color_scalefactors[i].w = 1.0f / MAX(color_scalefactors[i].w, 1e-7f);
uncorr_rgb_lines[i].a = averages[i];
if (dot(directions_rgb[i], directions_rgb[i]) == 0.0f)
uncorr_rgb_lines[i].b = normalize(float3(color_scalefactors[i].xyz));
else
uncorr_rgb_lines[i].b = normalize(directions_rgb[i]);
samechroma_rgb_lines[i].a = float3(0, 0, 0);
if (dot(averages[i], averages[i]) < 1e-20)
samechroma_rgb_lines[i].b = normalize(float3(color_scalefactors[i].xyz));
else
samechroma_rgb_lines[i].b = normalize(averages[i]);
rgb_luma_lines[i].a = averages[i];
rgb_luma_lines[i].b = normalize(color_scalefactors[i].xyz);
luminance_lines[i].a = float3(0, 0, 0);
luminance_lines[i].b = normalize(color_scalefactors[i].xyz);
#ifdef DEBUG_PRINT_DIAGNOSTICS
if (print_diagnostics)
{
printf("Partition %d\n", i);
printf("Average = <%g %g %g>\n", averages[i].x, averages[i].y, averages[i].z);
printf("Uncorr-rgb-line = <%g %g %g> + t<%g %g %g>\n",
uncorr_rgb_lines[i].a.x, uncorr_rgb_lines[i].a.y, uncorr_rgb_lines[i].a.z, uncorr_rgb_lines[i].b.x, uncorr_rgb_lines[i].b.y, uncorr_rgb_lines[i].b.z);
printf("Samechroma-line = t<%g %g %g>\n", samechroma_rgb_lines[i].b.x, samechroma_rgb_lines[i].b.y, samechroma_rgb_lines[i].b.z);
}
#endif
proc_uncorr_rgb_lines[i].amod = (uncorr_rgb_lines[i].a - uncorr_rgb_lines[i].b * dot(uncorr_rgb_lines[i].a, uncorr_rgb_lines[i].b)) * inverse_color_scalefactors[i].xyz;
proc_uncorr_rgb_lines[i].bs = uncorr_rgb_lines[i].b * color_scalefactors[i].xyz;
proc_uncorr_rgb_lines[i].bis = uncorr_rgb_lines[i].b * inverse_color_scalefactors[i].xyz;
proc_samechroma_rgb_lines[i].amod = (samechroma_rgb_lines[i].a - samechroma_rgb_lines[i].b * dot(samechroma_rgb_lines[i].a, samechroma_rgb_lines[i].b)) * inverse_color_scalefactors[i].xyz;
proc_samechroma_rgb_lines[i].bs = samechroma_rgb_lines[i].b * color_scalefactors[i].xyz;
proc_samechroma_rgb_lines[i].bis = samechroma_rgb_lines[i].b * inverse_color_scalefactors[i].xyz;
proc_rgb_luma_lines[i].amod = (rgb_luma_lines[i].a - rgb_luma_lines[i].b * dot(rgb_luma_lines[i].a, rgb_luma_lines[i].b)) * inverse_color_scalefactors[i].xyz;
proc_rgb_luma_lines[i].bs = rgb_luma_lines[i].b * color_scalefactors[i].xyz;
proc_rgb_luma_lines[i].bis = rgb_luma_lines[i].b * inverse_color_scalefactors[i].xyz;
proc_luminance_lines[i].amod = (luminance_lines[i].a - luminance_lines[i].b * dot(luminance_lines[i].a, luminance_lines[i].b)) * inverse_color_scalefactors[i].xyz;
proc_luminance_lines[i].bs = luminance_lines[i].b * color_scalefactors[i].xyz;
proc_luminance_lines[i].bis = luminance_lines[i].b * inverse_color_scalefactors[i].xyz;
}
float uncorr_rgb_error[4];
float samechroma_rgb_error[4];
float rgb_luma_error[4];
float luminance_rgb_error[4];
for (i = 0; i < partition_count; i++)
{
uncorr_rgb_error[i] = compute_error_squared_rgb_single_partition(i, xdim, ydim, zdim, pi, pb, ewb, &(proc_uncorr_rgb_lines[i]));
samechroma_rgb_error[i] = compute_error_squared_rgb_single_partition(i, xdim, ydim, zdim, pi, pb, ewb, &(proc_samechroma_rgb_lines[i]));
rgb_luma_error[i] = compute_error_squared_rgb_single_partition(i, xdim, ydim, zdim, pi, pb, ewb, &(proc_rgb_luma_lines[i]));
luminance_rgb_error[i] = compute_error_squared_rgb_single_partition(i, xdim, ydim, zdim, pi, pb, ewb, &(proc_luminance_lines[i]));
#ifdef DEBUG_PRINT_DIAGNOSTICS
if (print_diagnostics)
{
printf("Partition %d : uncorr-error=%g samechroma-error=%g rgb-luma-error=%g lum-error=%g\n",
i, uncorr_rgb_error[i], samechroma_rgb_error[i], rgb_luma_error[i], luminance_rgb_error[i]);
}
#endif
}
// compute the error that arises from just ditching alpha and RGB
float alpha_drop_error[4];
float rgb_drop_error[4];
for (i = 0; i < partition_count; i++)
{
alpha_drop_error[i] = 0;
rgb_drop_error[i] = 0;
}
for (i = 0; i < texels_per_block; i++)
{
int partition = pi->partition_of_texel[i];
float alpha = pb->work_data[4 * i + 3];
float default_alpha = pb->alpha_lns[i] ? (float)0x7800 : (float)0xFFFF;
float omalpha = alpha - default_alpha;
alpha_drop_error[partition] += omalpha * omalpha * ewb->error_weights[i].w;
float red = pb->work_data[4 * i];
float green = pb->work_data[4 * i + 1];
float blue = pb->work_data[4 * i + 2];
rgb_drop_error[partition] += red * red * ewb->error_weights[i].x + green * green * ewb->error_weights[i].y + blue * blue * ewb->error_weights[i].z;
}
// check if we are eligible for blue-contraction and offset-encoding
endpoints ep;
if (separate_component == -1)
{
endpoints_and_weights ei;
compute_endpoints_and_ideal_weights_1_plane(xdim, ydim, zdim, pi, pb, ewb, &ei);
ep = ei.ep;
}
else
{
endpoints_and_weights ei1, ei2;
compute_endpoints_and_ideal_weights_2_planes(xdim, ydim, zdim, pi, pb, ewb, separate_component, &ei1, &ei2);
merge_endpoints(&(ei1.ep), &(ei2.ep), separate_component, &ep);
}
int eligible_for_offset_encode[4];
int eligible_for_blue_contraction[4];
for (i = 0; i < partition_count; i++)
{
float4 endpt0 = ep.endpt0[i];
float4 endpt1 = ep.endpt1[i];
float4 endpt_dif = endpt1 - endpt0;
if (fabs(endpt_dif.x) < (0.12 * 65535.0f) && fabs(endpt_dif.y) < (0.12 * 65535.0f) && fabs(endpt_dif.z) < (0.12 * 65535.0f))
eligible_for_offset_encode[i] = 1;
else
eligible_for_offset_encode[i] = 0;
endpt0.x += (endpt0.x - endpt0.z);
endpt0.y += (endpt0.y - endpt0.z);
endpt1.x += (endpt1.x - endpt1.z);
endpt1.y += (endpt1.y - endpt1.z);
if (endpt0.x > (0.01f * 65535.0f) && endpt0.x < (0.99f * 65535.0f)
&& endpt1.x > (0.01f * 65535.0f) && endpt1.x < (0.99f * 65535.0f)
&& endpt0.y > (0.01f * 65535.0f) && endpt0.y < (0.99f * 65535.0f) && endpt1.y > (0.01f * 65535.0f) && endpt1.y < (0.99f * 65535.0f))
eligible_for_blue_contraction[i] = 1;
else
eligible_for_blue_contraction[i] = 0;
}
// finally, gather up our results
for (i = 0; i < partition_count; i++)
{
eci[i].rgb_scale_error = (samechroma_rgb_error[i] - uncorr_rgb_error[i]) * 0.7f; // empirical
eci[i].rgb_luma_error = (rgb_luma_error[i] - uncorr_rgb_error[i]) * 1.5f; // wild guess
eci[i].luminance_error = (luminance_rgb_error[i] - uncorr_rgb_error[i]) * 3.0f; // empirical
eci[i].alpha_drop_error = alpha_drop_error[i] * 3.0f;
eci[i].rgb_drop_error = rgb_drop_error[i] * 3.0f;
eci[i].can_offset_encode = eligible_for_offset_encode[i];
eci[i].can_blue_contract = eligible_for_blue_contraction[i];
}
}

View File

@@ -0,0 +1,865 @@
/*----------------------------------------------------------------------------*/
/**
* This confidential and proprietary software may be used only as
* authorised by a licensing agreement from ARM Limited
* (C) COPYRIGHT 2011-2012 ARM Limited
* ALL RIGHTS RESERVED
*
* The entire notice above must be reproduced on all authorised
* copies and copies may only be made to the extent permitted
* by a licensing agreement from ARM Limited.
*
* @brief ASTC encoding of texture
*
* major step 1:
* * find best partitioning assuming uncorrelated colors
* * find best partitioning assuming RGBS color representation
*
* finding best partitioning for a block:
* * for each available partitioning:
* * compute mean-color-value and dominant direction.
* * this defines two lines, both of which go through the
* mean-color-value:
* * one line has a direction defined by the dominant direction;
* this line is used to assess the error from using an uncorrelated
* color representation.
* * the other line goes through (0,0,0,1) and is used to assess the
* error from using an RGBS color representation.
* * we then compute, as a sum across the block, the squared-errors
* that result from using the dominant-direction-lines and the
* squared-errors that result from using the 0001-lines.
*/
/*----------------------------------------------------------------------------*/
/*
* Partition table representation:
* We have 3 tables, each with 1024 partitionings
* (these correspond to the 3x128 hardware partitionings crossed with all the
* partition-transform modes in the hardware.)
*
* For each partitioning, we have:
* * a 4-entry table indicating how many texels there are in each of the 4
* partitions. this may be from 2 to about 60 or so.
* * a 64-entry table indicating the partition index of each of the 64 texels
* in the block. each index may be 0, 1, 2 or 3.
*
* each element in the table is an uint8_t indicating partition index (0, 1, 2 or 3)
*/
#include <math.h>
#include "astc_codec_internals.h"
#ifdef DEBUG_PRINT_DIAGNOSTICS
#include <stdio.h>
#endif
#include "mathlib.h"
int imageblock_uses_alpha(int xdim, int ydim, int zdim, const imageblock * pb)
{
IGNORE(xdim);
IGNORE(ydim);
IGNORE(zdim);
return pb->alpha_max != pb->alpha_min;
}
static void compute_alpha_minmax(int xdim, int ydim, int zdim, const partition_info * pt, const imageblock * blk, const error_weight_block * ewb, float *alpha_min, float *alpha_max)
{
int i;
int partition_count = pt->partition_count;
int texels_per_block = xdim * ydim * zdim;
for (i = 0; i < partition_count; i++)
{
alpha_min[i] = 1e38f;
alpha_max[i] = -1e38f;
}
for (i = 0; i < texels_per_block; i++)
{
if (ewb->texel_weight[i] > 1e-10)
{
int partition = pt->partition_of_texel[i];
float alphaval = blk->work_data[4 * i + 3];
if (alphaval > alpha_max[partition])
alpha_max[partition] = alphaval;
if (alphaval < alpha_min[partition])
alpha_min[partition] = alphaval;
}
}
for (i = 0; i < partition_count; i++)
{
if (alpha_min[i] >= alpha_max[i])
{
alpha_min[i] = 0;
alpha_max[i] = 1e-10f;
}
}
}
static void compute_rgb_minmax(int xdim,
int ydim,
int zdim,
const partition_info * pt,
const imageblock * blk, const error_weight_block * ewb, float *red_min, float *red_max, float *green_min, float *green_max, float *blue_min, float *blue_max)
{
int i;
int partition_count = pt->partition_count;
int texels_per_block = xdim * ydim * zdim;
for (i = 0; i < partition_count; i++)
{
red_min[i] = 1e38f;
red_max[i] = -1e38f;
green_min[i] = 1e38f;
green_max[i] = -1e38f;
blue_min[i] = 1e38f;
blue_max[i] = -1e38f;
}
for (i = 0; i < texels_per_block; i++)
{
if (ewb->texel_weight[i] > 1e-10f)
{
int partition = pt->partition_of_texel[i];
float redval = blk->work_data[4 * i];
float greenval = blk->work_data[4 * i + 1];
float blueval = blk->work_data[4 * i + 2];
if (redval > red_max[partition])
red_max[partition] = redval;
if (redval < red_min[partition])
red_min[partition] = redval;
if (greenval > green_max[partition])
green_max[partition] = greenval;
if (greenval < green_min[partition])
green_min[partition] = greenval;
if (blueval > blue_max[partition])
blue_max[partition] = blueval;
if (blueval < blue_min[partition])
blue_min[partition] = blueval;
}
}
for (i = 0; i < partition_count; i++)
{
if (red_min[i] >= red_max[i])
{
red_min[i] = 0.0f;
red_max[i] = 1e-10f;
}
if (green_min[i] >= green_max[i])
{
green_min[i] = 0.0f;
green_max[i] = 1e-10f;
}
if (blue_min[i] >= blue_max[i])
{
blue_min[i] = 0.0f;
blue_max[i] = 1e-10f;
}
}
}
void compute_partition_error_color_weightings(int xdim, int ydim, int zdim, const error_weight_block * ewb, const partition_info * pi, float4 error_weightings[4], float4 color_scalefactors[4])
{
int i;
int texels_per_block = xdim * ydim * zdim;
int pcnt = pi->partition_count;
for (i = 0; i < pcnt; i++)
error_weightings[i] = float4(1e-12f, 1e-12f, 1e-12f, 1e-12f);
for (i = 0; i < texels_per_block; i++)
{
int part = pi->partition_of_texel[i];
error_weightings[part] = error_weightings[part] + ewb->error_weights[i];
}
for (i = 0; i < pcnt; i++)
{
error_weightings[i] = error_weightings[i] * (1.0f / pi->texels_per_partition[i]);
}
for (i = 0; i < pcnt; i++)
{
color_scalefactors[i].x = sqrt(error_weightings[i].x);
color_scalefactors[i].y = sqrt(error_weightings[i].y);
color_scalefactors[i].z = sqrt(error_weightings[i].z);
color_scalefactors[i].w = sqrt(error_weightings[i].w);
}
}
/*
main function to identify the best partitioning for a given number of texels */
void find_best_partitionings(int partition_search_limit, int xdim, int ydim, int zdim, int partition_count,
const imageblock * pb, const error_weight_block * ewb, int candidates_to_return,
// best partitionings to use if the endpoint colors are assumed to be uncorrelated
int *best_partitions_uncorrellated,
// best partitionings to use if the endpoint colors have the same chroma
int *best_partitions_samechroma,
// best partitionings to use if using dual plane of weights
int *best_partitions_dual_weight_planes)
{
int i, j;
int texels_per_block = xdim * ydim * zdim;
// constant used to estimate quantization error for a given partitioning;
// the optimal value for this constant depends on bitrate.
// These constants have been determined empirically.
float weight_imprecision_estim = 100;
if (texels_per_block <= 20)
weight_imprecision_estim = 0.03f;
else if (texels_per_block <= 31)
weight_imprecision_estim = 0.04f;
else if (texels_per_block <= 41)
weight_imprecision_estim = 0.05f;
else
weight_imprecision_estim = 0.055f;
int partition_sequence[PARTITION_COUNT];
kmeans_compute_partition_ordering(xdim, ydim, zdim, partition_count, pb, partition_sequence);
float weight_imprecision_estim_squared = weight_imprecision_estim * weight_imprecision_estim;
#ifdef DEBUG_PRINT_DIAGNOSTICS
if (print_diagnostics)
printf("weight_imprecision_estim = %g\n", weight_imprecision_estim);
#endif
int uses_alpha = imageblock_uses_alpha(xdim, ydim, zdim, pb);
const partition_info *ptab = get_partition_table(xdim, ydim, zdim, partition_count);
// partitioning errors assuming uncorrelated-chrominance endpoints
float uncorr_errors[PARTITION_COUNT];
// partitioning errors assuming same-chrominance endpoints
float samechroma_errors[PARTITION_COUNT];
// partitioning errors assuming that one of the color channels
// is uncorrelated from all the other ones
float separate_errors[4 * PARTITION_COUNT];
float *separate_red_errors = separate_errors;
float *separate_green_errors = separate_errors + PARTITION_COUNT;
float *separate_blue_errors = separate_errors + 2 * PARTITION_COUNT;
float *separate_alpha_errors = separate_errors + 3 * PARTITION_COUNT;
int defacto_search_limit = PARTITION_COUNT - 1;
if (uses_alpha)
{
#ifdef DEBUG_PRINT_DIAGNOSTICS
if (print_diagnostics)
printf("Partition testing with alpha, %d partitions\n\n", partition_count);
#endif
for (i = 0; i < PARTITION_COUNT; i++)
{
int partition = partition_sequence[i];
int bk_partition_count = ptab[partition].partition_count;
if (bk_partition_count < partition_count)
{
#ifdef DEBUG_PRINT_DIAGNOSTICS
if (print_diagnostics)
printf("Partitioning %d-%d: invalid\n", partition_count, partition);
#endif
uncorr_errors[i] = 1e35f;
samechroma_errors[i] = 1e35f;
separate_red_errors[i] = 1e35f;
separate_green_errors[i] = 1e35f;
separate_blue_errors[i] = 1e35f;
separate_alpha_errors[i] = 1e35f;
continue;
}
// the sentinel value for partitions above the search limit must be smaller
// than the sentinel value for invalid partitions
if (i >= partition_search_limit)
{
#ifdef DEBUG_PRINT_DIAGNOSTICS
if (print_diagnostics)
printf("Partitioning %d-%d: excluded from testing\n", partition_count, partition);
#endif
defacto_search_limit = i;
uncorr_errors[i] = 1e34f;
samechroma_errors[i] = 1e34f;
separate_red_errors[i] = 1e34f;
separate_green_errors[i] = 1e34f;
separate_blue_errors[i] = 1e34f;
separate_alpha_errors[i] = 1e34f;
break;
}
// compute the weighting to give to each color channel
// in each partition.
float4 error_weightings[4];
float4 color_scalefactors[4];
float4 inverse_color_scalefactors[4];
compute_partition_error_color_weightings(xdim, ydim, zdim, ewb, ptab + partition, error_weightings, color_scalefactors);
for (j = 0; j < partition_count; j++)
{
inverse_color_scalefactors[j].x = 1.0f / MAX(color_scalefactors[j].x, 1e-7f);
inverse_color_scalefactors[j].y = 1.0f / MAX(color_scalefactors[j].y, 1e-7f);
inverse_color_scalefactors[j].z = 1.0f / MAX(color_scalefactors[j].z, 1e-7f);
inverse_color_scalefactors[j].w = 1.0f / MAX(color_scalefactors[j].w, 1e-7f);
}
float4 averages[4];
float4 directions_rgba[4];
float3 directions_gba[4];
float3 directions_rba[4];
float3 directions_rga[4];
float3 directions_rgb[4];
compute_averages_and_directions_rgba(ptab + partition, pb, ewb, color_scalefactors, averages, directions_rgba, directions_gba, directions_rba, directions_rga, directions_rgb);
line4 uncorr_lines[4];
line4 samechroma_lines[4];
line3 separate_red_lines[4];
line3 separate_green_lines[4];
line3 separate_blue_lines[4];
line3 separate_alpha_lines[4];
processed_line4 proc_uncorr_lines[4];
processed_line4 proc_samechroma_lines[4];
processed_line3 proc_separate_red_lines[4];
processed_line3 proc_separate_green_lines[4];
processed_line3 proc_separate_blue_lines[4];
processed_line3 proc_separate_alpha_lines[4];
float uncorr_linelengths[4];
float samechroma_linelengths[4];
float separate_red_linelengths[4];
float separate_green_linelengths[4];
float separate_blue_linelengths[4];
float separate_alpha_linelengths[4];
for (j = 0; j < partition_count; j++)
{
uncorr_lines[j].a = averages[j];
if (dot(directions_rgba[j], directions_rgba[j]) == 0.0f)
uncorr_lines[j].b = normalize(float4(1, 1, 1, 1));
else
uncorr_lines[j].b = normalize(directions_rgba[j]);
proc_uncorr_lines[j].amod = (uncorr_lines[j].a - uncorr_lines[j].b * dot(uncorr_lines[j].a, uncorr_lines[j].b)) * inverse_color_scalefactors[j];
proc_uncorr_lines[j].bs = (uncorr_lines[j].b * color_scalefactors[j]);
proc_uncorr_lines[j].bis = (uncorr_lines[j].b * inverse_color_scalefactors[j]);
samechroma_lines[j].a = float4(0, 0, 0, 0);
if (dot(averages[j], averages[j]) == 0)
samechroma_lines[j].b = normalize(float4(1, 1, 1, 1));
else
samechroma_lines[j].b = normalize(averages[j]);
proc_samechroma_lines[j].amod = (samechroma_lines[j].a - samechroma_lines[j].b * dot(samechroma_lines[j].a, samechroma_lines[j].b)) * inverse_color_scalefactors[j];
proc_samechroma_lines[j].bs = (samechroma_lines[j].b * color_scalefactors[j]);
proc_samechroma_lines[j].bis = (samechroma_lines[j].b * inverse_color_scalefactors[j]);
separate_red_lines[j].a = averages[j].yzw;
if (dot(directions_gba[j], directions_gba[j]) == 0.0f)
separate_red_lines[j].b = normalize(float3(1, 1, 1));
else
separate_red_lines[j].b = normalize(directions_gba[j]);
separate_green_lines[j].a = averages[j].xzw;
if (dot(directions_rba[j], directions_rba[j]) == 0.0f)
separate_green_lines[j].b = normalize(float3(1, 1, 1));
else
separate_green_lines[j].b = normalize(directions_rba[j]);
separate_blue_lines[j].a = averages[j].xyw;
if (dot(directions_rga[j], directions_rga[j]) == 0.0f)
separate_blue_lines[j].b = normalize(float3(1, 1, 1));
else
separate_blue_lines[j].b = normalize(directions_rga[j]);
separate_alpha_lines[j].a = averages[j].xyz;
if (dot(directions_rgb[j], directions_rgb[j]) == 0.0f)
separate_alpha_lines[j].b = normalize(float3(1, 1, 1));
else
separate_alpha_lines[j].b = normalize(directions_rgb[j]);
proc_separate_red_lines[j].amod = (separate_red_lines[j].a - separate_red_lines[j].b * dot(separate_red_lines[j].a, separate_red_lines[j].b)) * inverse_color_scalefactors[j].yzw;
proc_separate_red_lines[j].bs = (separate_red_lines[j].b * color_scalefactors[j].yzw);
proc_separate_red_lines[j].bis = (separate_red_lines[j].b * inverse_color_scalefactors[j].yzw);
proc_separate_green_lines[j].amod =
(separate_green_lines[j].a - separate_green_lines[j].b * dot(separate_green_lines[j].a, separate_green_lines[j].b)) * inverse_color_scalefactors[j].xzw;
proc_separate_green_lines[j].bs = (separate_green_lines[j].b * color_scalefactors[j].xzw);
proc_separate_green_lines[j].bis = (separate_green_lines[j].b * inverse_color_scalefactors[j].xzw);
proc_separate_blue_lines[j].amod = (separate_blue_lines[j].a - separate_blue_lines[j].b * dot(separate_blue_lines[j].a, separate_blue_lines[j].b)) * inverse_color_scalefactors[j].xyw;
proc_separate_blue_lines[j].bs = (separate_blue_lines[j].b * color_scalefactors[j].xyw);
proc_separate_blue_lines[j].bis = (separate_blue_lines[j].b * inverse_color_scalefactors[j].xyw);
proc_separate_alpha_lines[j].amod =
(separate_alpha_lines[j].a - separate_alpha_lines[j].b * dot(separate_alpha_lines[j].a, separate_alpha_lines[j].b)) * inverse_color_scalefactors[j].xyz;
proc_separate_alpha_lines[j].bs = (separate_alpha_lines[j].b * color_scalefactors[j].xyz);
proc_separate_alpha_lines[j].bis = (separate_alpha_lines[j].b * inverse_color_scalefactors[j].xyz);
}
float uncorr_error = compute_error_squared_rgba(ptab + partition,
pb,
ewb,
proc_uncorr_lines,
uncorr_linelengths);
float samechroma_error = compute_error_squared_rgba(ptab + partition,
pb,
ewb,
proc_samechroma_lines,
samechroma_linelengths);
float separate_red_error = compute_error_squared_gba(ptab + partition,
pb,
ewb,
proc_separate_red_lines,
separate_red_linelengths);
float separate_green_error = compute_error_squared_rba(ptab + partition,
pb,
ewb,
proc_separate_green_lines,
separate_green_linelengths);
float separate_blue_error = compute_error_squared_rga(ptab + partition,
pb,
ewb,
proc_separate_blue_lines,
separate_blue_linelengths);
float separate_alpha_error = compute_error_squared_rgb(ptab + partition,
pb,
ewb,
proc_separate_alpha_lines,
separate_alpha_linelengths);
// compute minimum & maximum alpha values in each partition
float red_min[4], red_max[4];
float green_min[4], green_max[4];
float blue_min[4], blue_max[4];
float alpha_min[4], alpha_max[4];
compute_alpha_minmax(xdim, ydim, zdim, ptab + partition, pb, ewb, alpha_min, alpha_max);
compute_rgb_minmax(xdim, ydim, zdim, ptab + partition, pb, ewb, red_min, red_max, green_min, green_max, blue_min, blue_max);
/*
Compute an estimate of error introduced by weight quantization imprecision.
This error is computed as follows, for each partition
1: compute the principal-axis vector (full length) in error-space
2: convert the principal-axis vector to regular RGB-space
3: scale the vector by a constant that estimates average quantization error
4: for each texel, square the vector, then do a dot-product with the texel's error weight;
sum up the results across all texels.
4(optimized): square the vector once, then do a dot-product with the average texel error,
then multiply by the number of texels.
*/
for (j = 0; j < partition_count; j++)
{
float tpp = (float)(ptab[partition].texels_per_partition[j]);
float4 ics = inverse_color_scalefactors[j];
float4 error_weights = error_weightings[j] * (tpp * weight_imprecision_estim_squared);
float4 uncorr_vector = (uncorr_lines[j].b * uncorr_linelengths[j]) * ics;
float4 samechroma_vector = (samechroma_lines[j].b * samechroma_linelengths[j]) * ics;
float3 separate_red_vector = (separate_red_lines[j].b * separate_red_linelengths[j]) * ics.yzw;
float3 separate_green_vector = (separate_green_lines[j].b * separate_green_linelengths[j]) * ics.xzw;
float3 separate_blue_vector = (separate_blue_lines[j].b * separate_blue_linelengths[j]) * ics.xyw;
float3 separate_alpha_vector = (separate_alpha_lines[j].b * separate_alpha_linelengths[j]) * ics.xyz;
uncorr_vector = uncorr_vector * uncorr_vector;
samechroma_vector = samechroma_vector * samechroma_vector;
separate_red_vector = separate_red_vector * separate_red_vector;
separate_green_vector = separate_green_vector * separate_green_vector;
separate_blue_vector = separate_blue_vector * separate_blue_vector;
separate_alpha_vector = separate_alpha_vector * separate_alpha_vector;
uncorr_error += dot(uncorr_vector, error_weights);
samechroma_error += dot(samechroma_vector, error_weights);
separate_red_error += dot(separate_red_vector, error_weights.yzw);
separate_green_error += dot(separate_green_vector, error_weights.xzw);
separate_blue_error += dot(separate_blue_vector, error_weights.xyw);
separate_alpha_error += dot(separate_alpha_vector, error_weights.xyz);
float red_scalar = (red_max[j] - red_min[j]);
float green_scalar = (green_max[j] - green_min[j]);
float blue_scalar = (blue_max[j] - blue_min[j]);
float alpha_scalar = (alpha_max[j] - alpha_min[j]);
red_scalar *= red_scalar;
green_scalar *= green_scalar;
blue_scalar *= blue_scalar;
alpha_scalar *= alpha_scalar;
separate_red_error += red_scalar * error_weights.x;
separate_green_error += green_scalar * error_weights.y;
separate_blue_error += blue_scalar * error_weights.z;
separate_alpha_error += alpha_scalar * error_weights.w;
}
uncorr_errors[i] = uncorr_error;
samechroma_errors[i] = samechroma_error;
separate_red_errors[i] = separate_red_error;
separate_green_errors[i] = separate_green_error;
separate_blue_errors[i] = separate_blue_error;
separate_alpha_errors[i] = separate_alpha_error;
#ifdef DEBUG_PRINT_DIAGNOSTICS
if (print_diagnostics)
printf("Partitioning %d-%d errors: uncorr=%g, samechroma=%g, sep-alpha=%g\n", partition_count, i, uncorr_error, samechroma_error, separate_alpha_error);
#endif
}
}
else
{
#ifdef DEBUG_PRINT_DIAGNOSTICS
if (print_diagnostics)
printf("Partition testing without alpha, %d partitions\n", partition_count);
#endif
for (i = 0; i < PARTITION_COUNT; i++)
{
int partition = partition_sequence[i];
int bk_partition_count = ptab[partition].partition_count;
if (bk_partition_count < partition_count)
{
#ifdef DEBUG_PRINT_DIAGNOSTICS
if (print_diagnostics)
printf("Partitioning %d-%d: invalid\n", partition_count, i);
#endif
uncorr_errors[i] = 1e35f;
samechroma_errors[i] = 1e35f;
separate_red_errors[i] = 1e35f;
separate_green_errors[i] = 1e35f;
separate_blue_errors[i] = 1e35f;
continue;
}
// the sentinel value for valid partitions above the search limit must be smaller
// than the sentinel value for invalid partitions
if (i >= partition_search_limit)
{
#ifdef DEBUG_PRINT_DIAGNOSTICS
if (print_diagnostics)
printf(" Partitioning %d-%d: excluded from testing\n", partition_count, partition);
#endif
defacto_search_limit = i;
uncorr_errors[i] = 1e34f;
samechroma_errors[i] = 1e34f;
separate_red_errors[i] = 1e34f;
separate_green_errors[i] = 1e34f;
separate_blue_errors[i] = 1e34f;
break;
}
// compute the weighting to give to each color channel
// in each partition.
float4 error_weightings[4];
float4 color_scalefactors[4];
float4 inverse_color_scalefactors[4];
compute_partition_error_color_weightings(xdim, ydim, zdim, ewb, ptab + partition, error_weightings, color_scalefactors);
for (j = 0; j < partition_count; j++)
{
inverse_color_scalefactors[j].x = 1.0f / MAX(color_scalefactors[j].x, 1e-7f);
inverse_color_scalefactors[j].y = 1.0f / MAX(color_scalefactors[j].y, 1e-7f);
inverse_color_scalefactors[j].z = 1.0f / MAX(color_scalefactors[j].z, 1e-7f);
inverse_color_scalefactors[j].w = 1.0f / MAX(color_scalefactors[j].w, 1e-7f);
}
float3 averages[4];
float3 directions_rgb[4];
float2 directions_rg[4];
float2 directions_rb[4];
float2 directions_gb[4];
compute_averages_and_directions_rgb(ptab + partition, pb, ewb, color_scalefactors, averages, directions_rgb, directions_rg, directions_rb, directions_gb);
line3 uncorr_lines[4];
line3 samechroma_lines[4];
line2 separate_red_lines[4];
line2 separate_green_lines[4];
line2 separate_blue_lines[4];
processed_line3 proc_uncorr_lines[4];
processed_line3 proc_samechroma_lines[4];
processed_line2 proc_separate_red_lines[4];
processed_line2 proc_separate_green_lines[4];
processed_line2 proc_separate_blue_lines[4];
float uncorr_linelengths[4];
float samechroma_linelengths[4];
float separate_red_linelengths[4];
float separate_green_linelengths[4];
float separate_blue_linelengths[4];
for (j = 0; j < partition_count; j++)
{
uncorr_lines[j].a = averages[j];
if (dot(directions_rgb[j], directions_rgb[j]) == 0.0f)
uncorr_lines[j].b = normalize(float3(1, 1, 1));
else
uncorr_lines[j].b = normalize(directions_rgb[j]);
samechroma_lines[j].a = float3(0, 0, 0);
if (dot(averages[j], averages[j]) == 0.0f)
samechroma_lines[j].b = normalize(float3(1, 1, 1));
else
samechroma_lines[j].b = normalize(averages[j]);
proc_uncorr_lines[j].amod = (uncorr_lines[j].a - uncorr_lines[j].b * dot(uncorr_lines[j].a, uncorr_lines[j].b)) * inverse_color_scalefactors[j].xyz;
proc_uncorr_lines[j].bs = (uncorr_lines[j].b * color_scalefactors[j].xyz);
proc_uncorr_lines[j].bis = (uncorr_lines[j].b * inverse_color_scalefactors[j].xyz);
proc_samechroma_lines[j].amod = (samechroma_lines[j].a - samechroma_lines[j].b * dot(samechroma_lines[j].a, samechroma_lines[j].b)) * inverse_color_scalefactors[j].xyz;
proc_samechroma_lines[j].bs = (samechroma_lines[j].b * color_scalefactors[j].xyz);
proc_samechroma_lines[j].bis = (samechroma_lines[j].b * inverse_color_scalefactors[j].xyz);
separate_red_lines[j].a = averages[j].yz;
if (dot(directions_gb[j], directions_gb[j]) == 0.0f)
separate_red_lines[j].b = normalize(float2(1, 1));
else
separate_red_lines[j].b = normalize(directions_gb[j]);
separate_green_lines[j].a = averages[j].xz;
if (dot(directions_rb[j], directions_rb[j]) == 0.0f)
separate_green_lines[j].b = normalize(float2(1, 1));
else
separate_green_lines[j].b = normalize(directions_rb[j]);
separate_blue_lines[j].a = averages[j].xy;
if (dot(directions_rg[j], directions_rg[j]) == 0.0f)
separate_blue_lines[j].b = normalize(float2(1, 1));
else
separate_blue_lines[j].b = normalize(directions_rg[j]);
proc_separate_red_lines[j].amod = (separate_red_lines[j].a - separate_red_lines[j].b * dot(separate_red_lines[j].a, separate_red_lines[j].b)) * inverse_color_scalefactors[j].yz;
proc_separate_red_lines[j].bs = (separate_red_lines[j].b * color_scalefactors[j].yz);
proc_separate_red_lines[j].bis = (separate_red_lines[j].b * inverse_color_scalefactors[j].yz);
proc_separate_green_lines[j].amod =
(separate_green_lines[j].a - separate_green_lines[j].b * dot(separate_green_lines[j].a, separate_green_lines[j].b)) * inverse_color_scalefactors[j].xz;
proc_separate_green_lines[j].bs = (separate_green_lines[j].b * color_scalefactors[j].xz);
proc_separate_green_lines[j].bis = (separate_green_lines[j].b * inverse_color_scalefactors[j].xz);
proc_separate_blue_lines[j].amod = (separate_blue_lines[j].a - separate_blue_lines[j].b * dot(separate_blue_lines[j].a, separate_blue_lines[j].b)) * inverse_color_scalefactors[j].xy;
proc_separate_blue_lines[j].bs = (separate_blue_lines[j].b * color_scalefactors[j].xy);
proc_separate_blue_lines[j].bis = (separate_blue_lines[j].b * inverse_color_scalefactors[j].xy);
}
float uncorr_error = compute_error_squared_rgb(ptab + partition,
pb,
ewb,
proc_uncorr_lines,
uncorr_linelengths);
float samechroma_error = compute_error_squared_rgb(ptab + partition,
pb,
ewb,
proc_samechroma_lines,
samechroma_linelengths);
float separate_red_error = compute_error_squared_gb(ptab + partition,
pb,
ewb,
proc_separate_red_lines,
separate_red_linelengths);
float separate_green_error = compute_error_squared_rb(ptab + partition,
pb,
ewb,
proc_separate_green_lines,
separate_green_linelengths);
float separate_blue_error = compute_error_squared_rg(ptab + partition,
pb,
ewb,
proc_separate_blue_lines,
separate_blue_linelengths);
float red_min[4], red_max[4];
float green_min[4], green_max[4];
float blue_min[4], blue_max[4];
compute_rgb_minmax(xdim, ydim, zdim, ptab + partition, pb, ewb, red_min, red_max, green_min, green_max, blue_min, blue_max);
/*
compute an estimate of error introduced by weight imprecision.
This error is computed as follows, for each partition
1: compute the principal-axis vector (full length) in error-space
2: convert the principal-axis vector to regular RGB-space
3: scale the vector by a constant that estimates average quantization error.
4: for each texel, square the vector, then do a dot-product with the texel's error weight;
sum up the results across all texels.
4(optimized): square the vector once, then do a dot-product with the average texel error,
then multiply by the number of texels.
*/
for (j = 0; j < partition_count; j++)
{
float tpp = (float)(ptab[partition].texels_per_partition[j]);
float3 ics = inverse_color_scalefactors[j].xyz;
float3 error_weights = error_weightings[j].xyz * (tpp * weight_imprecision_estim_squared);
float3 uncorr_vector = (uncorr_lines[j].b * uncorr_linelengths[j]) * ics;
float3 samechroma_vector = (samechroma_lines[j].b * samechroma_linelengths[j]) * ics;
float2 separate_red_vector = (separate_red_lines[j].b * separate_red_linelengths[j]) * ics.yz;
float2 separate_green_vector = (separate_green_lines[j].b * separate_green_linelengths[j]) * ics.xz;
float2 separate_blue_vector = (separate_blue_lines[j].b * separate_blue_linelengths[j]) * ics.xy;
uncorr_vector = uncorr_vector * uncorr_vector;
samechroma_vector = samechroma_vector * samechroma_vector;
separate_red_vector = separate_red_vector * separate_red_vector;
separate_green_vector = separate_green_vector * separate_green_vector;
separate_blue_vector = separate_blue_vector * separate_blue_vector;
uncorr_error += dot(uncorr_vector, error_weights);
samechroma_error += dot(samechroma_vector, error_weights);
separate_red_error += dot(separate_red_vector, error_weights.yz);
separate_green_error += dot(separate_green_vector, error_weights.xz);
separate_blue_error += dot(separate_blue_vector, error_weights.xy);
float red_scalar = (red_max[j] - red_min[j]);
float green_scalar = (green_max[j] - green_min[j]);
float blue_scalar = (blue_max[j] - blue_min[j]);
red_scalar *= red_scalar;
green_scalar *= green_scalar;
blue_scalar *= blue_scalar;
separate_red_error += red_scalar * error_weights.x;
separate_green_error += green_scalar * error_weights.y;
separate_blue_error += blue_scalar * error_weights.z;
}
uncorr_errors[i] = uncorr_error;
samechroma_errors[i] = samechroma_error;
separate_red_errors[i] = separate_red_error;
separate_green_errors[i] = separate_green_error;
separate_blue_errors[i] = separate_blue_error;
#ifdef DEBUG_PRINT_DIAGNOSTICS
if (print_diagnostics)
printf("Partitioning %d-%d errors: uncorr=%f, samechroma=%f, sep-red=%f, sep-green=%f, sep-blue=%f\n",
partition_count, partition, uncorr_error, samechroma_error, separate_red_error, separate_green_error, separate_blue_error);
#endif
}
}
for (i = 0; i < candidates_to_return; i++)
{
int best_uncorr_partition = 0;
int best_samechroma_partition = 0;
float best_uncorr_error = 1e30f;
float best_samechroma_error = 1e30f;
for (j = 0; j <= defacto_search_limit; j++)
{
if (uncorr_errors[j] < best_uncorr_error)
{
best_uncorr_partition = j;
best_uncorr_error = uncorr_errors[j];
}
}
best_partitions_uncorrellated[i] = partition_sequence[best_uncorr_partition];
uncorr_errors[best_uncorr_partition] = 1e30f;
samechroma_errors[best_uncorr_partition] = 1e30f;
for (j = 0; j <= defacto_search_limit; j++)
{
if (samechroma_errors[j] < best_samechroma_error)
{
best_samechroma_partition = j;
best_samechroma_error = samechroma_errors[j];
}
}
best_partitions_samechroma[i] = partition_sequence[best_samechroma_partition];
samechroma_errors[best_samechroma_partition] = 1e30f;
uncorr_errors[best_samechroma_partition] = 1e30f;
}
for (i = 0; i < 2 * candidates_to_return; i++)
{
int best_partition = 0;
float best_partition_error = 1e30f;
for (j = 0; j <= defacto_search_limit; j++)
{
if (1 || !uses_alpha)
{
if (separate_errors[j] < best_partition_error)
{
best_partition = j;
best_partition_error = separate_errors[j];
}
if (separate_errors[j + PARTITION_COUNT] < best_partition_error)
{
best_partition = j + PARTITION_COUNT;
best_partition_error = separate_errors[j + PARTITION_COUNT];
}
if (separate_errors[j + 2 * PARTITION_COUNT] < best_partition_error)
{
best_partition = j + 2 * PARTITION_COUNT;
best_partition_error = separate_errors[j + 2 * PARTITION_COUNT];
}
}
if (uses_alpha)
{
if (separate_errors[j + 3 * PARTITION_COUNT] < best_partition_error)
{
best_partition = j + 3 * PARTITION_COUNT;
best_partition_error = separate_errors[j + 3 * PARTITION_COUNT];
}
}
}
separate_errors[best_partition] = 1e30f;
best_partition = ((best_partition >> PARTITION_BITS) << PARTITION_BITS) | partition_sequence[best_partition & (PARTITION_COUNT - 1)];
best_partitions_dual_weight_planes[i] = best_partition;
}
}

File diff suppressed because it is too large Load Diff

324
3rdparty/astc/astc_imageblock.cpp vendored Normal file
View File

@@ -0,0 +1,324 @@
/*----------------------------------------------------------------------------*/
/**
* This confidential and proprietary software may be used only as
* authorised by a licensing agreement from ARM Limited
* (C) COPYRIGHT 2011-2012 ARM Limited
* ALL RIGHTS RESERVED
*
* The entire notice above must be reproduced on all authorised
* copies and copies may only be made to the extent permitted
* by a licensing agreement from ARM Limited.
*
* @brief Functions for managing ASTC codec images.
*/
/*----------------------------------------------------------------------------*/
#include <math.h>
#include "astc_codec_internals.h"
#include "softfloat.h"
#include <stdint.h>
#include <stdio.h>
// conversion functions between the LNS representation and the FP16 representation.
float float_to_lns(float p)
{
if (astc_isnan(p) || p <= 1.0f / 67108864.0f)
{
// underflow or NaN value, return 0.
// We count underflow if the input value is smaller than 2^-26.
return 0;
}
if (fabs(p) >= 65536.0f)
{
// overflow, return a +INF value
return 65535;
}
int expo;
float normfrac = frexp(p, &expo);
float p1;
if (expo < -13)
{
// input number is smaller than 2^-14. In this case, multiply by 2^25.
p1 = p * 33554432.0f;
expo = 0;
}
else
{
expo += 14;
p1 = (normfrac - 0.5f) * 4096.0f;
}
if (p1 < 384.0f)
p1 *= 4.0f / 3.0f;
else if (p1 <= 1408.0f)
p1 += 128.0f;
else
p1 = (p1 + 512.0f) * (4.0f / 5.0f);
p1 += expo * 2048.0f;
return p1 + 1.0f;
}
uint16_t lns_to_sf16(uint16_t p)
{
uint16_t mc = p & 0x7FF;
uint16_t ec = p >> 11;
uint16_t mt;
if (mc < 512)
mt = 3 * mc;
else if (mc < 1536)
mt = 4 * mc - 512;
else
mt = 5 * mc - 2048;
uint16_t res = (ec << 10) | (mt >> 3);
if (res >= 0x7BFF)
res = 0x7BFF;
return res;
}
// conversion function from 16-bit LDR value to FP16.
// note: for LDR interpolation, it is impossible to get a denormal result;
// this simplifies the conversion.
// FALSE; we can receive a very small UNORM16 through the constant-block.
uint16_t unorm16_to_sf16(uint16_t p)
{
if (p == 0xFFFF)
return 0x3C00; // value of 1.0 .
if (p < 4)
return p << 8;
int lz = clz32(p) - 16;
p <<= (lz + 1);
p >>= 6;
p |= (14 - lz) << 10;
return p;
}
void imageblock_initialize_deriv_from_work_and_orig(imageblock * pb, int pixelcount)
{
int i;
const float *fptr = pb->orig_data;
const float *wptr = pb->work_data;
float *dptr = pb->deriv_data;
for (i = 0; i < pixelcount; i++)
{
// compute derivatives for RGB first
if (pb->rgb_lns[i])
{
float r = MAX(fptr[0], 6e-5f);
float g = MAX(fptr[1], 6e-5f);
float b = MAX(fptr[2], 6e-5f);
float rderiv = (float_to_lns(r * 1.05f) - float_to_lns(r)) / (r * 0.05f);
float gderiv = (float_to_lns(g * 1.05f) - float_to_lns(g)) / (g * 0.05f);
float bderiv = (float_to_lns(b * 1.05f) - float_to_lns(b)) / (b * 0.05f);
// the derivative may not actually take values smaller than 1/32 or larger than 2^25;
// if it does, we clamp it.
if (rderiv < (1.0f / 32.0f))
rderiv = (1.0f / 32.0f);
else if (rderiv > 33554432.0f)
rderiv = 33554432.0f;
if (gderiv < (1.0f / 32.0f))
gderiv = (1.0f / 32.0f);
else if (gderiv > 33554432.0f)
gderiv = 33554432.0f;
if (bderiv < (1.0f / 32.0f))
bderiv = (1.0f / 32.0f);
else if (bderiv > 33554432.0f)
bderiv = 33554432.0f;
dptr[0] = rderiv;
dptr[1] = gderiv;
dptr[2] = bderiv;
}
else
{
dptr[0] = 65535.0f;
dptr[1] = 65535.0f;
dptr[2] = 65535.0f;
}
// then compute derivatives for Alpha
if (pb->alpha_lns[i])
{
float a = MAX(fptr[3], 6e-5f);
float aderiv = (float_to_lns(a * 1.05f) - float_to_lns(a)) / (a * 0.05f);
// the derivative may not actually take values smaller than 1/32 or larger than 2^25;
// if it does, we clamp it.
if (aderiv < (1.0f / 32.0f))
aderiv = (1.0f / 32.0f);
else if (aderiv > 33554432.0f)
aderiv = 33554432.0f;
dptr[3] = aderiv;
}
else
{
dptr[3] = 65535.0f;
}
fptr += 4;
wptr += 4;
dptr += 4;
}
}
// helper function to initialize the work-data from the orig-data
void imageblock_initialize_work_from_orig(imageblock * pb, int pixelcount)
{
int i;
float *fptr = pb->orig_data;
float *wptr = pb->work_data;
for (i = 0; i < pixelcount; i++)
{
if (pb->rgb_lns[i])
{
wptr[0] = float_to_lns(fptr[0]);
wptr[1] = float_to_lns(fptr[1]);
wptr[2] = float_to_lns(fptr[2]);
}
else
{
wptr[0] = fptr[0] * 65535.0f;
wptr[1] = fptr[1] * 65535.0f;
wptr[2] = fptr[2] * 65535.0f;
}
if (pb->alpha_lns[i])
{
wptr[3] = float_to_lns(fptr[3]);
}
else
{
wptr[3] = fptr[3] * 65535.0f;
}
fptr += 4;
wptr += 4;
}
imageblock_initialize_deriv_from_work_and_orig(pb, pixelcount);
}
// helper function to initialize the orig-data from the work-data
void imageblock_initialize_orig_from_work(imageblock * pb, int pixelcount)
{
int i;
float *fptr = pb->orig_data;
float *wptr = pb->work_data;
for (i = 0; i < pixelcount; i++)
{
if (pb->rgb_lns[i])
{
fptr[0] = sf16_to_float(lns_to_sf16((uint16_t) wptr[0]));
fptr[1] = sf16_to_float(lns_to_sf16((uint16_t) wptr[1]));
fptr[2] = sf16_to_float(lns_to_sf16((uint16_t) wptr[2]));
}
else
{
fptr[0] = sf16_to_float(unorm16_to_sf16((uint16_t) wptr[0]));
fptr[1] = sf16_to_float(unorm16_to_sf16((uint16_t) wptr[1]));
fptr[2] = sf16_to_float(unorm16_to_sf16((uint16_t) wptr[2]));
}
if (pb->alpha_lns[i])
{
fptr[3] = sf16_to_float(lns_to_sf16((uint16_t) wptr[3]));
}
else
{
fptr[3] = sf16_to_float(unorm16_to_sf16((uint16_t) wptr[3]));
}
fptr += 4;
wptr += 4;
}
imageblock_initialize_deriv_from_work_and_orig(pb, pixelcount);
}
/*
For an imageblock, update its flags.
The updating is done based on work_data, not orig_data.
*/
void update_imageblock_flags(imageblock * pb, int xdim, int ydim, int zdim)
{
int i;
float red_min = 1e38f, red_max = -1e38f;
float green_min = 1e38f, green_max = -1e38f;
float blue_min = 1e38f, blue_max = -1e38f;
float alpha_min = 1e38f, alpha_max = -1e38f;
int texels_per_block = xdim * ydim * zdim;
int grayscale = 1;
for (i = 0; i < texels_per_block; i++)
{
float red = pb->work_data[4 * i];
float green = pb->work_data[4 * i + 1];
float blue = pb->work_data[4 * i + 2];
float alpha = pb->work_data[4 * i + 3];
if (red < red_min)
red_min = red;
if (red > red_max)
red_max = red;
if (green < green_min)
green_min = green;
if (green > green_max)
green_max = green;
if (blue < blue_min)
blue_min = blue;
if (blue > blue_max)
blue_max = blue;
if (alpha < alpha_min)
alpha_min = alpha;
if (alpha > alpha_max)
alpha_max = alpha;
if (grayscale == 1 && (red != green || red != blue))
grayscale = 0;
}
pb->red_min = red_min;
pb->red_max = red_max;
pb->green_min = green_min;
pb->green_max = green_max;
pb->blue_min = blue_min;
pb->blue_max = blue_max;
pb->alpha_min = alpha_min;
pb->alpha_max = alpha_max;
pb->grayscale = grayscale;
}

649
3rdparty/astc/astc_integer_sequence.cpp vendored Normal file
View File

@@ -0,0 +1,649 @@
/*----------------------------------------------------------------------------*/
/**
* This confidential and proprietary software may be used only as
* authorised by a licensing agreement from ARM Limited
* (C) COPYRIGHT 2011-2012 ARM Limited
* ALL RIGHTS RESERVED
*
* The entire notice above must be reproduced on all authorised
* copies and copies may only be made to the extent permitted
* by a licensing agreement from ARM Limited.
*
* @brief Functions to encode/decode data using Bounded Integer Sequence
* Encoding.
*/
/*----------------------------------------------------------------------------*/
#include "astc_codec_internals.h"
// unpacked quint triplets <low,middle,high> for each packed-quint value
static const uint8_t quints_of_integer[128][3] = {
{0, 0, 0}, {1, 0, 0}, {2, 0, 0}, {3, 0, 0},
{4, 0, 0}, {0, 4, 0}, {4, 4, 0}, {4, 4, 4},
{0, 1, 0}, {1, 1, 0}, {2, 1, 0}, {3, 1, 0},
{4, 1, 0}, {1, 4, 0}, {4, 4, 1}, {4, 4, 4},
{0, 2, 0}, {1, 2, 0}, {2, 2, 0}, {3, 2, 0},
{4, 2, 0}, {2, 4, 0}, {4, 4, 2}, {4, 4, 4},
{0, 3, 0}, {1, 3, 0}, {2, 3, 0}, {3, 3, 0},
{4, 3, 0}, {3, 4, 0}, {4, 4, 3}, {4, 4, 4},
{0, 0, 1}, {1, 0, 1}, {2, 0, 1}, {3, 0, 1},
{4, 0, 1}, {0, 4, 1}, {4, 0, 4}, {0, 4, 4},
{0, 1, 1}, {1, 1, 1}, {2, 1, 1}, {3, 1, 1},
{4, 1, 1}, {1, 4, 1}, {4, 1, 4}, {1, 4, 4},
{0, 2, 1}, {1, 2, 1}, {2, 2, 1}, {3, 2, 1},
{4, 2, 1}, {2, 4, 1}, {4, 2, 4}, {2, 4, 4},
{0, 3, 1}, {1, 3, 1}, {2, 3, 1}, {3, 3, 1},
{4, 3, 1}, {3, 4, 1}, {4, 3, 4}, {3, 4, 4},
{0, 0, 2}, {1, 0, 2}, {2, 0, 2}, {3, 0, 2},
{4, 0, 2}, {0, 4, 2}, {2, 0, 4}, {3, 0, 4},
{0, 1, 2}, {1, 1, 2}, {2, 1, 2}, {3, 1, 2},
{4, 1, 2}, {1, 4, 2}, {2, 1, 4}, {3, 1, 4},
{0, 2, 2}, {1, 2, 2}, {2, 2, 2}, {3, 2, 2},
{4, 2, 2}, {2, 4, 2}, {2, 2, 4}, {3, 2, 4},
{0, 3, 2}, {1, 3, 2}, {2, 3, 2}, {3, 3, 2},
{4, 3, 2}, {3, 4, 2}, {2, 3, 4}, {3, 3, 4},
{0, 0, 3}, {1, 0, 3}, {2, 0, 3}, {3, 0, 3},
{4, 0, 3}, {0, 4, 3}, {0, 0, 4}, {1, 0, 4},
{0, 1, 3}, {1, 1, 3}, {2, 1, 3}, {3, 1, 3},
{4, 1, 3}, {1, 4, 3}, {0, 1, 4}, {1, 1, 4},
{0, 2, 3}, {1, 2, 3}, {2, 2, 3}, {3, 2, 3},
{4, 2, 3}, {2, 4, 3}, {0, 2, 4}, {1, 2, 4},
{0, 3, 3}, {1, 3, 3}, {2, 3, 3}, {3, 3, 3},
{4, 3, 3}, {3, 4, 3}, {0, 3, 4}, {1, 3, 4},
};
// packed quint-value for every unpacked quint-triplet
// indexed by [high][middle][low]
static const uint8_t integer_of_quints[5][5][5] = {
{
{0, 1, 2, 3, 4,},
{8, 9, 10, 11, 12,},
{16, 17, 18, 19, 20,},
{24, 25, 26, 27, 28,},
{5, 13, 21, 29, 6,},
},
{
{32, 33, 34, 35, 36,},
{40, 41, 42, 43, 44,},
{48, 49, 50, 51, 52,},
{56, 57, 58, 59, 60,},
{37, 45, 53, 61, 14,},
},
{
{64, 65, 66, 67, 68,},
{72, 73, 74, 75, 76,},
{80, 81, 82, 83, 84,},
{88, 89, 90, 91, 92,},
{69, 77, 85, 93, 22,},
},
{
{96, 97, 98, 99, 100,},
{104, 105, 106, 107, 108,},
{112, 113, 114, 115, 116,},
{120, 121, 122, 123, 124,},
{101, 109, 117, 125, 30,},
},
{
{102, 103, 70, 71, 38,},
{110, 111, 78, 79, 46,},
{118, 119, 86, 87, 54,},
{126, 127, 94, 95, 62,},
{39, 47, 55, 63, 31,},
},
};
// unpacked trit quintuplets <low,_,_,_,high> for each packed-quint value
static const uint8_t trits_of_integer[256][5] = {
{0, 0, 0, 0, 0}, {1, 0, 0, 0, 0}, {2, 0, 0, 0, 0}, {0, 0, 2, 0, 0},
{0, 1, 0, 0, 0}, {1, 1, 0, 0, 0}, {2, 1, 0, 0, 0}, {1, 0, 2, 0, 0},
{0, 2, 0, 0, 0}, {1, 2, 0, 0, 0}, {2, 2, 0, 0, 0}, {2, 0, 2, 0, 0},
{0, 2, 2, 0, 0}, {1, 2, 2, 0, 0}, {2, 2, 2, 0, 0}, {2, 0, 2, 0, 0},
{0, 0, 1, 0, 0}, {1, 0, 1, 0, 0}, {2, 0, 1, 0, 0}, {0, 1, 2, 0, 0},
{0, 1, 1, 0, 0}, {1, 1, 1, 0, 0}, {2, 1, 1, 0, 0}, {1, 1, 2, 0, 0},
{0, 2, 1, 0, 0}, {1, 2, 1, 0, 0}, {2, 2, 1, 0, 0}, {2, 1, 2, 0, 0},
{0, 0, 0, 2, 2}, {1, 0, 0, 2, 2}, {2, 0, 0, 2, 2}, {0, 0, 2, 2, 2},
{0, 0, 0, 1, 0}, {1, 0, 0, 1, 0}, {2, 0, 0, 1, 0}, {0, 0, 2, 1, 0},
{0, 1, 0, 1, 0}, {1, 1, 0, 1, 0}, {2, 1, 0, 1, 0}, {1, 0, 2, 1, 0},
{0, 2, 0, 1, 0}, {1, 2, 0, 1, 0}, {2, 2, 0, 1, 0}, {2, 0, 2, 1, 0},
{0, 2, 2, 1, 0}, {1, 2, 2, 1, 0}, {2, 2, 2, 1, 0}, {2, 0, 2, 1, 0},
{0, 0, 1, 1, 0}, {1, 0, 1, 1, 0}, {2, 0, 1, 1, 0}, {0, 1, 2, 1, 0},
{0, 1, 1, 1, 0}, {1, 1, 1, 1, 0}, {2, 1, 1, 1, 0}, {1, 1, 2, 1, 0},
{0, 2, 1, 1, 0}, {1, 2, 1, 1, 0}, {2, 2, 1, 1, 0}, {2, 1, 2, 1, 0},
{0, 1, 0, 2, 2}, {1, 1, 0, 2, 2}, {2, 1, 0, 2, 2}, {1, 0, 2, 2, 2},
{0, 0, 0, 2, 0}, {1, 0, 0, 2, 0}, {2, 0, 0, 2, 0}, {0, 0, 2, 2, 0},
{0, 1, 0, 2, 0}, {1, 1, 0, 2, 0}, {2, 1, 0, 2, 0}, {1, 0, 2, 2, 0},
{0, 2, 0, 2, 0}, {1, 2, 0, 2, 0}, {2, 2, 0, 2, 0}, {2, 0, 2, 2, 0},
{0, 2, 2, 2, 0}, {1, 2, 2, 2, 0}, {2, 2, 2, 2, 0}, {2, 0, 2, 2, 0},
{0, 0, 1, 2, 0}, {1, 0, 1, 2, 0}, {2, 0, 1, 2, 0}, {0, 1, 2, 2, 0},
{0, 1, 1, 2, 0}, {1, 1, 1, 2, 0}, {2, 1, 1, 2, 0}, {1, 1, 2, 2, 0},
{0, 2, 1, 2, 0}, {1, 2, 1, 2, 0}, {2, 2, 1, 2, 0}, {2, 1, 2, 2, 0},
{0, 2, 0, 2, 2}, {1, 2, 0, 2, 2}, {2, 2, 0, 2, 2}, {2, 0, 2, 2, 2},
{0, 0, 0, 0, 2}, {1, 0, 0, 0, 2}, {2, 0, 0, 0, 2}, {0, 0, 2, 0, 2},
{0, 1, 0, 0, 2}, {1, 1, 0, 0, 2}, {2, 1, 0, 0, 2}, {1, 0, 2, 0, 2},
{0, 2, 0, 0, 2}, {1, 2, 0, 0, 2}, {2, 2, 0, 0, 2}, {2, 0, 2, 0, 2},
{0, 2, 2, 0, 2}, {1, 2, 2, 0, 2}, {2, 2, 2, 0, 2}, {2, 0, 2, 0, 2},
{0, 0, 1, 0, 2}, {1, 0, 1, 0, 2}, {2, 0, 1, 0, 2}, {0, 1, 2, 0, 2},
{0, 1, 1, 0, 2}, {1, 1, 1, 0, 2}, {2, 1, 1, 0, 2}, {1, 1, 2, 0, 2},
{0, 2, 1, 0, 2}, {1, 2, 1, 0, 2}, {2, 2, 1, 0, 2}, {2, 1, 2, 0, 2},
{0, 2, 2, 2, 2}, {1, 2, 2, 2, 2}, {2, 2, 2, 2, 2}, {2, 0, 2, 2, 2},
{0, 0, 0, 0, 1}, {1, 0, 0, 0, 1}, {2, 0, 0, 0, 1}, {0, 0, 2, 0, 1},
{0, 1, 0, 0, 1}, {1, 1, 0, 0, 1}, {2, 1, 0, 0, 1}, {1, 0, 2, 0, 1},
{0, 2, 0, 0, 1}, {1, 2, 0, 0, 1}, {2, 2, 0, 0, 1}, {2, 0, 2, 0, 1},
{0, 2, 2, 0, 1}, {1, 2, 2, 0, 1}, {2, 2, 2, 0, 1}, {2, 0, 2, 0, 1},
{0, 0, 1, 0, 1}, {1, 0, 1, 0, 1}, {2, 0, 1, 0, 1}, {0, 1, 2, 0, 1},
{0, 1, 1, 0, 1}, {1, 1, 1, 0, 1}, {2, 1, 1, 0, 1}, {1, 1, 2, 0, 1},
{0, 2, 1, 0, 1}, {1, 2, 1, 0, 1}, {2, 2, 1, 0, 1}, {2, 1, 2, 0, 1},
{0, 0, 1, 2, 2}, {1, 0, 1, 2, 2}, {2, 0, 1, 2, 2}, {0, 1, 2, 2, 2},
{0, 0, 0, 1, 1}, {1, 0, 0, 1, 1}, {2, 0, 0, 1, 1}, {0, 0, 2, 1, 1},
{0, 1, 0, 1, 1}, {1, 1, 0, 1, 1}, {2, 1, 0, 1, 1}, {1, 0, 2, 1, 1},
{0, 2, 0, 1, 1}, {1, 2, 0, 1, 1}, {2, 2, 0, 1, 1}, {2, 0, 2, 1, 1},
{0, 2, 2, 1, 1}, {1, 2, 2, 1, 1}, {2, 2, 2, 1, 1}, {2, 0, 2, 1, 1},
{0, 0, 1, 1, 1}, {1, 0, 1, 1, 1}, {2, 0, 1, 1, 1}, {0, 1, 2, 1, 1},
{0, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {2, 1, 1, 1, 1}, {1, 1, 2, 1, 1},
{0, 2, 1, 1, 1}, {1, 2, 1, 1, 1}, {2, 2, 1, 1, 1}, {2, 1, 2, 1, 1},
{0, 1, 1, 2, 2}, {1, 1, 1, 2, 2}, {2, 1, 1, 2, 2}, {1, 1, 2, 2, 2},
{0, 0, 0, 2, 1}, {1, 0, 0, 2, 1}, {2, 0, 0, 2, 1}, {0, 0, 2, 2, 1},
{0, 1, 0, 2, 1}, {1, 1, 0, 2, 1}, {2, 1, 0, 2, 1}, {1, 0, 2, 2, 1},
{0, 2, 0, 2, 1}, {1, 2, 0, 2, 1}, {2, 2, 0, 2, 1}, {2, 0, 2, 2, 1},
{0, 2, 2, 2, 1}, {1, 2, 2, 2, 1}, {2, 2, 2, 2, 1}, {2, 0, 2, 2, 1},
{0, 0, 1, 2, 1}, {1, 0, 1, 2, 1}, {2, 0, 1, 2, 1}, {0, 1, 2, 2, 1},
{0, 1, 1, 2, 1}, {1, 1, 1, 2, 1}, {2, 1, 1, 2, 1}, {1, 1, 2, 2, 1},
{0, 2, 1, 2, 1}, {1, 2, 1, 2, 1}, {2, 2, 1, 2, 1}, {2, 1, 2, 2, 1},
{0, 2, 1, 2, 2}, {1, 2, 1, 2, 2}, {2, 2, 1, 2, 2}, {2, 1, 2, 2, 2},
{0, 0, 0, 1, 2}, {1, 0, 0, 1, 2}, {2, 0, 0, 1, 2}, {0, 0, 2, 1, 2},
{0, 1, 0, 1, 2}, {1, 1, 0, 1, 2}, {2, 1, 0, 1, 2}, {1, 0, 2, 1, 2},
{0, 2, 0, 1, 2}, {1, 2, 0, 1, 2}, {2, 2, 0, 1, 2}, {2, 0, 2, 1, 2},
{0, 2, 2, 1, 2}, {1, 2, 2, 1, 2}, {2, 2, 2, 1, 2}, {2, 0, 2, 1, 2},
{0, 0, 1, 1, 2}, {1, 0, 1, 1, 2}, {2, 0, 1, 1, 2}, {0, 1, 2, 1, 2},
{0, 1, 1, 1, 2}, {1, 1, 1, 1, 2}, {2, 1, 1, 1, 2}, {1, 1, 2, 1, 2},
{0, 2, 1, 1, 2}, {1, 2, 1, 1, 2}, {2, 2, 1, 1, 2}, {2, 1, 2, 1, 2},
{0, 2, 2, 2, 2}, {1, 2, 2, 2, 2}, {2, 2, 2, 2, 2}, {2, 1, 2, 2, 2},
};
// packed trit-value for every unpacked trit-quintuplet
// indexed by [high][][][][low]
static const uint8_t integer_of_trits[3][3][3][3][3] = {
{
{
{
{0, 1, 2,},
{4, 5, 6,},
{8, 9, 10,},
},
{
{16, 17, 18,},
{20, 21, 22,},
{24, 25, 26,},
},
{
{3, 7, 15,},
{19, 23, 27,},
{12, 13, 14,},
},
},
{
{
{32, 33, 34,},
{36, 37, 38,},
{40, 41, 42,},
},
{
{48, 49, 50,},
{52, 53, 54,},
{56, 57, 58,},
},
{
{35, 39, 47,},
{51, 55, 59,},
{44, 45, 46,},
},
},
{
{
{64, 65, 66,},
{68, 69, 70,},
{72, 73, 74,},
},
{
{80, 81, 82,},
{84, 85, 86,},
{88, 89, 90,},
},
{
{67, 71, 79,},
{83, 87, 91,},
{76, 77, 78,},
},
},
},
{
{
{
{128, 129, 130,},
{132, 133, 134,},
{136, 137, 138,},
},
{
{144, 145, 146,},
{148, 149, 150,},
{152, 153, 154,},
},
{
{131, 135, 143,},
{147, 151, 155,},
{140, 141, 142,},
},
},
{
{
{160, 161, 162,},
{164, 165, 166,},
{168, 169, 170,},
},
{
{176, 177, 178,},
{180, 181, 182,},
{184, 185, 186,},
},
{
{163, 167, 175,},
{179, 183, 187,},
{172, 173, 174,},
},
},
{
{
{192, 193, 194,},
{196, 197, 198,},
{200, 201, 202,},
},
{
{208, 209, 210,},
{212, 213, 214,},
{216, 217, 218,},
},
{
{195, 199, 207,},
{211, 215, 219,},
{204, 205, 206,},
},
},
},
{
{
{
{96, 97, 98,},
{100, 101, 102,},
{104, 105, 106,},
},
{
{112, 113, 114,},
{116, 117, 118,},
{120, 121, 122,},
},
{
{99, 103, 111,},
{115, 119, 123,},
{108, 109, 110,},
},
},
{
{
{224, 225, 226,},
{228, 229, 230,},
{232, 233, 234,},
},
{
{240, 241, 242,},
{244, 245, 246,},
{248, 249, 250,},
},
{
{227, 231, 239,},
{243, 247, 251,},
{236, 237, 238,},
},
},
{
{
{28, 29, 30,},
{60, 61, 62,},
{92, 93, 94,},
},
{
{156, 157, 158,},
{188, 189, 190,},
{220, 221, 222,},
},
{
{31, 63, 127,},
{159, 191, 255,},
{252, 253, 254,},
},
},
},
};
void find_number_of_bits_trits_quints(int quantization_level, int *bits, int *trits, int *quints)
{
*bits = 0;
*trits = 0;
*quints = 0;
switch (quantization_level)
{
case QUANT_2:
*bits = 1;
break;
case QUANT_3:
*bits = 0;
*trits = 1;
break;
case QUANT_4:
*bits = 2;
break;
case QUANT_5:
*bits = 0;
*quints = 1;
break;
case QUANT_6:
*bits = 1;
*trits = 1;
break;
case QUANT_8:
*bits = 3;
break;
case QUANT_10:
*bits = 1;
*quints = 1;
break;
case QUANT_12:
*bits = 2;
*trits = 1;
break;
case QUANT_16:
*bits = 4;
break;
case QUANT_20:
*bits = 2;
*quints = 1;
break;
case QUANT_24:
*bits = 3;
*trits = 1;
break;
case QUANT_32:
*bits = 5;
break;
case QUANT_40:
*bits = 3;
*quints = 1;
break;
case QUANT_48:
*bits = 4;
*trits = 1;
break;
case QUANT_64:
*bits = 6;
break;
case QUANT_80:
*bits = 4;
*quints = 1;
break;
case QUANT_96:
*bits = 5;
*trits = 1;
break;
case QUANT_128:
*bits = 7;
break;
case QUANT_160:
*bits = 5;
*quints = 1;
break;
case QUANT_192:
*bits = 6;
*trits = 1;
break;
case QUANT_256:
*bits = 8;
break;
}
}
// routine to write up to 8 bits
static inline void write_bits(int value, int bitcount, int bitoffset, uint8_t * ptr)
{
int mask = (1 << bitcount) - 1;
value &= mask;
ptr += bitoffset >> 3;
bitoffset &= 7;
value <<= bitoffset;
mask <<= bitoffset;
mask = ~mask;
ptr[0] &= mask;
ptr[0] |= value;
ptr[1] &= mask >> 8;
ptr[1] |= value >> 8;
}
// routine to read up to 8 bits
static inline int read_bits(int bitcount, int bitoffset, const uint8_t * ptr)
{
int mask = (1 << bitcount) - 1;
ptr += bitoffset >> 3;
bitoffset &= 7;
int value = ptr[0] | (ptr[1] << 8);
value >>= bitoffset;
value &= mask;
return value;
}
void encode_ise(int quantization_level, int elements, const uint8_t * input_data, uint8_t * output_data, int bit_offset)
{
int i;
uint8_t lowparts[64];
uint8_t highparts[69]; // 64 elements + 5 elements for padding
uint8_t tq_blocks[22]; // trit-blocks or quint-blocks
int bits, trits, quints;
find_number_of_bits_trits_quints(quantization_level, &bits, &trits, &quints);
for (i = 0; i < elements; i++)
{
lowparts[i] = input_data[i] & ((1 << bits) - 1);
highparts[i] = input_data[i] >> bits;
}
for (i = elements; i < elements + 5; i++)
highparts[i] = 0; // padding before we start constructing trit-blocks or quint-blocks
// construct trit-blocks or quint-blocks as necessary
if (trits)
{
int trit_blocks = (elements + 4) / 5;
for (i = 0; i < trit_blocks; i++)
tq_blocks[i] = integer_of_trits[highparts[5 * i + 4]][highparts[5 * i + 3]][highparts[5 * i + 2]][highparts[5 * i + 1]][highparts[5 * i]];
}
if (quints)
{
int quint_blocks = (elements + 2) / 3;
for (i = 0; i < quint_blocks; i++)
tq_blocks[i] = integer_of_quints[highparts[3 * i + 2]][highparts[3 * i + 1]][highparts[3 * i]];
}
// then, write out the actual bits.
int lcounter = 0;
int hcounter = 0;
for (i = 0; i < elements; i++)
{
write_bits(lowparts[i], bits, bit_offset, output_data);
bit_offset += bits;
if (trits)
{
static const int bits_to_write[5] = { 2, 2, 1, 2, 1 };
static const int block_shift[5] = { 0, 2, 4, 5, 7 };
static const int next_lcounter[5] = { 1, 2, 3, 4, 0 };
static const int hcounter_incr[5] = { 0, 0, 0, 0, 1 };
write_bits(tq_blocks[hcounter] >> block_shift[lcounter], bits_to_write[lcounter], bit_offset, output_data);
bit_offset += bits_to_write[lcounter];
hcounter += hcounter_incr[lcounter];
lcounter = next_lcounter[lcounter];
}
if (quints)
{
static const int bits_to_write[3] = { 3, 2, 2 };
static const int block_shift[3] = { 0, 3, 5 };
static const int next_lcounter[3] = { 1, 2, 0 };
static const int hcounter_incr[3] = { 0, 0, 1 };
write_bits(tq_blocks[hcounter] >> block_shift[lcounter], bits_to_write[lcounter], bit_offset, output_data);
bit_offset += bits_to_write[lcounter];
hcounter += hcounter_incr[lcounter];
lcounter = next_lcounter[lcounter];
}
}
}
void decode_ise(int quantization_level, int elements, const uint8_t * input_data, uint8_t * output_data, int bit_offset)
{
int i;
// note: due to how the trit/quint-block unpacking is done in this function,
// we may write more temporary results than the number of outputs
// The maximum actual number of results is 64 bit, but we keep 4 additional elements
// of padding.
uint8_t results[68];
uint8_t tq_blocks[22]; // trit-blocks or quint-blocks
int bits, trits, quints;
find_number_of_bits_trits_quints(quantization_level, &bits, &trits, &quints);
int lcounter = 0;
int hcounter = 0;
// trit-blocks or quint-blocks must be zeroed out before we collect them in the loop below.
for (i = 0; i < 22; i++)
tq_blocks[i] = 0;
// collect bits for each element, as well as bits for any trit-blocks and quint-blocks.
for (i = 0; i < elements; i++)
{
results[i] = read_bits(bits, bit_offset, input_data);
bit_offset += bits;
if (trits)
{
static const int bits_to_read[5] = { 2, 2, 1, 2, 1 };
static const int block_shift[5] = { 0, 2, 4, 5, 7 };
static const int next_lcounter[5] = { 1, 2, 3, 4, 0 };
static const int hcounter_incr[5] = { 0, 0, 0, 0, 1 };
int tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data);
bit_offset += bits_to_read[lcounter];
tq_blocks[hcounter] |= tdata << block_shift[lcounter];
hcounter += hcounter_incr[lcounter];
lcounter = next_lcounter[lcounter];
}
if (quints)
{
static const int bits_to_read[3] = { 3, 2, 2 };
static const int block_shift[3] = { 0, 3, 5 };
static const int next_lcounter[3] = { 1, 2, 0 };
static const int hcounter_incr[3] = { 0, 0, 1 };
int tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data);
bit_offset += bits_to_read[lcounter];
tq_blocks[hcounter] |= tdata << block_shift[lcounter];
hcounter += hcounter_incr[lcounter];
lcounter = next_lcounter[lcounter];
}
}
// unpack trit-blocks or quint-blocks as needed
if (trits)
{
int trit_blocks = (elements + 4) / 5;
for (i = 0; i < trit_blocks; i++)
{
const uint8_t *tritptr = trits_of_integer[tq_blocks[i]];
results[5 * i] |= tritptr[0] << bits;
results[5 * i + 1] |= tritptr[1] << bits;
results[5 * i + 2] |= tritptr[2] << bits;
results[5 * i + 3] |= tritptr[3] << bits;
results[5 * i + 4] |= tritptr[4] << bits;
}
}
if (quints)
{
int quint_blocks = (elements + 2) / 3;
for (i = 0; i < quint_blocks; i++)
{
const uint8_t *quintptr = quints_of_integer[tq_blocks[i]];
results[3 * i] |= quintptr[0] << bits;
results[3 * i + 1] |= quintptr[1] << bits;
results[3 * i + 2] |= quintptr[2] << bits;
}
}
for (i = 0; i < elements; i++)
output_data[i] = results[i];
}
int compute_ise_bitcount(int items, quantization_method quant)
{
switch (quant)
{
case QUANT_2:
return items;
case QUANT_3:
return (8 * items + 4) / 5;
case QUANT_4:
return 2 * items;
case QUANT_5:
return (7 * items + 2) / 3;
case QUANT_6:
return (13 * items + 4) / 5;
case QUANT_8:
return 3 * items;
case QUANT_10:
return (10 * items + 2) / 3;
case QUANT_12:
return (18 * items + 4) / 5;
case QUANT_16:
return items * 4;
case QUANT_20:
return (13 * items + 2) / 3;
case QUANT_24:
return (23 * items + 4) / 5;
case QUANT_32:
return 5 * items;
case QUANT_40:
return (16 * items + 2) / 3;
case QUANT_48:
return (28 * items + 4) / 5;
case QUANT_64:
return 6 * items;
case QUANT_80:
return (19 * items + 2) / 3;
case QUANT_96:
return (33 * items + 4) / 5;
case QUANT_128:
return 7 * items;
case QUANT_160:
return (22 * items + 2) / 3;
case QUANT_192:
return (38 * items + 4) / 5;
case QUANT_256:
return 8 * items;
default:
return 100000;
}
}

View File

@@ -0,0 +1,520 @@
/*----------------------------------------------------------------------------*/
/**
* This confidential and proprietary software may be used only as
* authorised by a licensing agreement from ARM Limited
* (C) COPYRIGHT 2011-2012 ARM Limited
* ALL RIGHTS RESERVED
*
* The entire notice above must be reproduced on all authorised
* copies and copies may only be made to the extent permitted
* by a licensing agreement from ARM Limited.
*
* @brief approximate k-means cluster partitioning. Do this in 2 stages
*
* 1: basic clustering, a couple of passes just to get a few clusters
* 2: clustering based on line, a few passes until it seems to
* stabilize.
*
* After clustering is done, we use the clustering result to construct
* one bitmap for each partition. We then scan though the partition table,
* counting how well the bitmaps matched.
*/
/*----------------------------------------------------------------------------*/
#include "astc_codec_internals.h"
// for k++ means, we need pseudo-random numbers, however using random numbers directly
// results in irreproducible encoding results. As such, we will instead
// just supply a handful of numbers from random.org, and apply an algorithm similar
// to XKCD #221. (http://xkcd.com/221/)
// cluster the texels using the k++ means clustering initialization algorithm.
void kpp_initialize(int xdim, int ydim, int zdim, int partition_count, const imageblock * blk, float4 * cluster_centers)
{
int i;
int texels_per_block = xdim * ydim * zdim;
int cluster_center_samples[4];
// pick a random sample as first center-point.
cluster_center_samples[0] = 145897 /* number from random.org */ % texels_per_block;
int samples_selected = 1;
float distances[MAX_TEXELS_PER_BLOCK];
// compute the distance to the first point.
int sample = cluster_center_samples[0];
float4 center_color = float4(blk->work_data[4 * sample],
blk->work_data[4 * sample + 1],
blk->work_data[4 * sample + 2],
blk->work_data[4 * sample + 3]);
float distance_sum = 0.0f;
for (i = 0; i < texels_per_block; i++)
{
float4 color = float4(blk->work_data[4 * i],
blk->work_data[4 * i + 1],
blk->work_data[4 * i + 2],
blk->work_data[4 * i + 3]);
float4 diff = color - center_color;
float distance = dot(diff, diff);
distance_sum += distance;
distances[i] = distance;
}
// more numbers from random.org
float cluster_cutoffs[25] = {
0.952312f, 0.206893f, 0.835984f, 0.507813f, 0.466170f,
0.872331f, 0.488028f, 0.866394f, 0.363093f, 0.467905f,
0.812967f, 0.626220f, 0.932770f, 0.275454f, 0.832020f,
0.362217f, 0.318558f, 0.240113f, 0.009190f, 0.983995f,
0.566812f, 0.347661f, 0.731960f, 0.156391f, 0.297786f
};
while (1)
{
// pick a point in a weighted-random fashion.
float summa = 0.0f;
float distance_cutoff = distance_sum * cluster_cutoffs[samples_selected + 5 * partition_count];
for (i = 0; i < texels_per_block; i++)
{
summa += distances[i];
if (summa >= distance_cutoff)
break;
}
sample = i;
if (sample >= texels_per_block)
sample = texels_per_block - 1;
cluster_center_samples[samples_selected] = sample;
samples_selected++;
if (samples_selected >= partition_count)
break;
// update the distances with the new point.
center_color = float4(blk->work_data[4 * sample], blk->work_data[4 * sample + 1], blk->work_data[4 * sample + 2], blk->work_data[4 * sample + 3]);
distance_sum = 0.0f;
for (i = 0; i < texels_per_block; i++)
{
float4 color = float4(blk->work_data[4 * i],
blk->work_data[4 * i + 1],
blk->work_data[4 * i + 2],
blk->work_data[4 * i + 3]);
float4 diff = color - center_color;
float distance = dot(diff, diff);
distance = MIN(distance, distances[i]);
distance_sum += distance;
distances[i] = distance;
}
}
// finally, gather up the results.
for (i = 0; i < partition_count; i++)
{
int sample = cluster_center_samples[i];
float4 color = float4(blk->work_data[4 * sample],
blk->work_data[4 * sample + 1],
blk->work_data[4 * sample + 2],
blk->work_data[4 * sample + 3]);
cluster_centers[i] = color;
}
}
// basic K-means clustering: given a set of cluster centers,
// assign each texel to a partition
void basic_kmeans_assign_pass(int xdim, int ydim, int zdim, int partition_count, const imageblock * blk, const float4 * cluster_centers, int *partition_of_texel)
{
int i, j;
int texels_per_block = xdim * ydim * zdim;
float distances[MAX_TEXELS_PER_BLOCK];
float4 center_color = cluster_centers[0];
int texels_per_partition[4];
texels_per_partition[0] = texels_per_block;
for (i = 1; i < partition_count; i++)
texels_per_partition[i] = 0;
for (i = 0; i < texels_per_block; i++)
{
float4 color = float4(blk->work_data[4 * i],
blk->work_data[4 * i + 1],
blk->work_data[4 * i + 2],
blk->work_data[4 * i + 3]);
float4 diff = color - center_color;
float distance = dot(diff, diff);
distances[i] = distance;
partition_of_texel[i] = 0;
}
for (j = 1; j < partition_count; j++)
{
float4 center_color = cluster_centers[j];
for (i = 0; i < texels_per_block; i++)
{
float4 color = float4(blk->work_data[4 * i],
blk->work_data[4 * i + 1],
blk->work_data[4 * i + 2],
blk->work_data[4 * i + 3]);
float4 diff = color - center_color;
float distance = dot(diff, diff);
if (distance < distances[i])
{
distances[i] = distance;
texels_per_partition[partition_of_texel[i]]--;
texels_per_partition[j]++;
partition_of_texel[i] = j;
}
}
}
// it is possible to get a situation where one of the partitions ends up
// without any texels. In this case, we assign texel N to partition N;
// this is silly, but ensures that every partition retains at least one texel.
// Reassigning a texel in this manner may cause another partition to go empty,
// so if we actually did a reassignment, we run the whole loop over again.
int problem_case;
do
{
problem_case = 0;
for (i = 0; i < partition_count; i++)
{
if (texels_per_partition[i] == 0)
{
texels_per_partition[partition_of_texel[i]]--;
texels_per_partition[i]++;
partition_of_texel[i] = i;
problem_case = 1;
}
}
}
while (problem_case != 0);
}
// basic k-means clustering: given a set of cluster assignments
// for the texels, find the center position of each cluster.
void basic_kmeans_update(int xdim, int ydim, int zdim, int partition_count, const imageblock * blk, const int *partition_of_texel, float4 * cluster_centers)
{
int i;
int texels_per_block = xdim * ydim * zdim;
float4 color_sum[4];
int weight_sum[4];
for (i = 0; i < partition_count; i++)
{
color_sum[i] = float4(0, 0, 0, 0);
weight_sum[i] = 0;
}
// first, find the center-of-gravity in each cluster
for (i = 0; i < texels_per_block; i++)
{
float4 color = float4(blk->work_data[4 * i],
blk->work_data[4 * i + 1],
blk->work_data[4 * i + 2],
blk->work_data[4 * i + 3]);
int part = partition_of_texel[i];
color_sum[part] = color_sum[part] + color;
weight_sum[part]++;
}
for (i = 0; i < partition_count; i++)
{
cluster_centers[i] = color_sum[i] * (1.0f / weight_sum[i]);
}
}
// after a few rounds of k-means-clustering, we should have a set of 2, 3 or 4 partitions;
// we then turn this set into 2, 3 or 4 bitmaps. Then, for each of the 1024 partitions,
// we try to match the bitmaps as well as possible.
static inline int bitcount(uint64_t p)
{
if (sizeof(void *) > 4)
{
uint64_t mask1 = 0x5555555555555555ULL;
uint64_t mask2 = 0x3333333333333333ULL;
uint64_t mask3 = 0x0F0F0F0F0F0F0F0FULL;
// best-known algorithm for 64-bit bitcount, assuming 64-bit processor
// should probably be adapted for use with 32-bit processors and/or processors
// with a POPCNT instruction, but leave that for later.
p -= (p >> 1) & mask1;
p = (p & mask2) + ((p >> 2) & mask2);
p += p >> 4;
p &= mask3;
p *= 0x0101010101010101ULL;
p >>= 56;
return (int)p;
}
else
{
// on 32-bit processor, split the 64-bit input argument in two,
// and bitcount each half separately.
uint32_t p1 = (uint32_t) p;
uint32_t p2 = (uint32_t) (p >> 32);
uint32_t mask1 = 0x55555555U;
uint32_t mask2 = 0x33333333U;
uint32_t mask3 = 0x0F0F0F0FU;
p1 = p1 - ((p1 >> 1) & mask1);
p2 = p2 - ((p2 >> 1) & mask1);
p1 = (p1 & mask2) + ((p1 >> 2) & mask2);
p2 = (p2 & mask2) + ((p2 >> 2) & mask2);
p1 += p1 >> 4;
p2 += p2 >> 4;
p1 &= mask3;
p2 &= mask3;
p1 += p2;
p1 *= 0x01010101U;
p1 >>= 24;
return (int)p1;
}
}
// compute the bit-mismatch for a partitioning in 2-partition mode
static inline int partition_mismatch2(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
{
int v1 = bitcount(a0 ^ b0) + bitcount(a1 ^ b1);
int v2 = bitcount(a0 ^ b1) + bitcount(a1 ^ b0);
return MIN(v1, v2);
}
// compute the bit-mismatch for a partitioning in 3-partition mode
static inline int partition_mismatch3(uint64_t a0, uint64_t a1, uint64_t a2, uint64_t b0, uint64_t b1, uint64_t b2)
{
int p00 = bitcount(a0 ^ b0);
int p01 = bitcount(a0 ^ b1);
int p02 = bitcount(a0 ^ b2);
int p10 = bitcount(a1 ^ b0);
int p11 = bitcount(a1 ^ b1);
int p12 = bitcount(a1 ^ b2);
int p20 = bitcount(a2 ^ b0);
int p21 = bitcount(a2 ^ b1);
int p22 = bitcount(a2 ^ b2);
int s0 = p11 + p22;
int s1 = p12 + p21;
int v0 = MIN(s0, s1) + p00;
int s2 = p10 + p22;
int s3 = p12 + p20;
int v1 = MIN(s2, s3) + p01;
int s4 = p10 + p21;
int s5 = p11 + p20;
int v2 = MIN(s4, s5) + p02;
if (v1 < v0)
v0 = v1;
if (v2 < v0)
v0 = v2;
// 9 add, 5 MIN
return v0;
}
static inline int MIN3(int a, int b, int c)
{
int d = MIN(a, b);
return MIN(c, d);
}
// compute the bit-mismatch for a partitioning in 4-partition mode
static inline int partition_mismatch4(uint64_t a0, uint64_t a1, uint64_t a2, uint64_t a3, uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3)
{
int p00 = bitcount(a0 ^ b0);
int p01 = bitcount(a0 ^ b1);
int p02 = bitcount(a0 ^ b2);
int p03 = bitcount(a0 ^ b3);
int p10 = bitcount(a1 ^ b0);
int p11 = bitcount(a1 ^ b1);
int p12 = bitcount(a1 ^ b2);
int p13 = bitcount(a1 ^ b3);
int p20 = bitcount(a2 ^ b0);
int p21 = bitcount(a2 ^ b1);
int p22 = bitcount(a2 ^ b2);
int p23 = bitcount(a2 ^ b3);
int p30 = bitcount(a3 ^ b0);
int p31 = bitcount(a3 ^ b1);
int p32 = bitcount(a3 ^ b2);
int p33 = bitcount(a3 ^ b3);
int mx23 = MIN(p22 + p33, p23 + p32);
int mx13 = MIN(p21 + p33, p23 + p31);
int mx12 = MIN(p21 + p32, p22 + p31);
int mx03 = MIN(p20 + p33, p23 + p30);
int mx02 = MIN(p20 + p32, p22 + p30);
int mx01 = MIN(p21 + p30, p20 + p31);
int v0 = p00 + MIN3(p11 + mx23, p12 + mx13, p13 + mx12);
int v1 = p01 + MIN3(p10 + mx23, p12 + mx03, p13 + mx02);
int v2 = p02 + MIN3(p11 + mx03, p10 + mx13, p13 + mx01);
int v3 = p03 + MIN3(p11 + mx02, p12 + mx01, p10 + mx12);
int x0 = MIN(v0, v1);
int x1 = MIN(v2, v3);
return MIN(x0, x1);
// 16 bitcount, 17 MIN, 28 ADD
}
void count_partition_mismatch_bits(int xdim, int ydim, int zdim, int partition_count, const uint64_t bitmaps[4], int bitcounts[PARTITION_COUNT])
{
int i;
const partition_info *pi = get_partition_table(xdim, ydim, zdim, partition_count);
if (partition_count == 2)
{
uint64_t bm0 = bitmaps[0];
uint64_t bm1 = bitmaps[1];
for (i = 0; i < PARTITION_COUNT; i++)
{
if (pi->partition_count == 2)
{
bitcounts[i] = partition_mismatch2(bm0, bm1, pi->coverage_bitmaps[0], pi->coverage_bitmaps[1]);
}
else
bitcounts[i] = 255;
pi++;
}
}
else if (partition_count == 3)
{
uint64_t bm0 = bitmaps[0];
uint64_t bm1 = bitmaps[1];
uint64_t bm2 = bitmaps[2];
for (i = 0; i < PARTITION_COUNT; i++)
{
if (pi->partition_count == 3)
{
bitcounts[i] = partition_mismatch3(bm0, bm1, bm2, pi->coverage_bitmaps[0], pi->coverage_bitmaps[1], pi->coverage_bitmaps[2]);
}
else
bitcounts[i] = 255;
pi++;
}
}
else if (partition_count == 4)
{
uint64_t bm0 = bitmaps[0];
uint64_t bm1 = bitmaps[1];
uint64_t bm2 = bitmaps[2];
uint64_t bm3 = bitmaps[3];
for (i = 0; i < PARTITION_COUNT; i++)
{
if (pi->partition_count == 4)
{
bitcounts[i] = partition_mismatch4(bm0, bm1, bm2, bm3, pi->coverage_bitmaps[0], pi->coverage_bitmaps[1], pi->coverage_bitmaps[2], pi->coverage_bitmaps[3]);
}
else
bitcounts[i] = 255;
pi++;
}
}
}
// counting-sort on the mismatch-bits, thereby
// sorting the partitions into an ordering.
void get_partition_ordering_by_mismatch_bits(const int mismatch_bits[PARTITION_COUNT], int partition_ordering[PARTITION_COUNT])
{
int i;
int mscount[256];
for (i = 0; i < 256; i++)
mscount[i] = 0;
for (i = 0; i < PARTITION_COUNT; i++)
mscount[mismatch_bits[i]]++;
int summa = 0;
for (i = 0; i < 256; i++)
{
int cnt = mscount[i];
mscount[i] = summa;
summa += cnt;
}
for (i = 0; i < PARTITION_COUNT; i++)
{
int idx = mscount[mismatch_bits[i]]++;
partition_ordering[idx] = i;
}
}
void kmeans_compute_partition_ordering(int xdim, int ydim, int zdim, int partition_count, const imageblock * blk, int *ordering)
{
int i;
const block_size_descriptor *bsd = get_block_size_descriptor(xdim, ydim, zdim);
float4 cluster_centers[4];
int partition_of_texel[MAX_TEXELS_PER_BLOCK];
// 3 passes of plain k-means partitioning
for (i = 0; i < 3; i++)
{
if (i == 0)
kpp_initialize(xdim, ydim, zdim, partition_count, blk, cluster_centers);
else
basic_kmeans_update(xdim, ydim, zdim, partition_count, blk, partition_of_texel, cluster_centers);
basic_kmeans_assign_pass(xdim, ydim, zdim, partition_count, blk, cluster_centers, partition_of_texel);
}
// at this point, we have a near-ideal partitioning.
// construct bitmaps
uint64_t bitmaps[4];
for (i = 0; i < 4; i++)
bitmaps[i] = 0ULL;
int texels_to_process = bsd->texelcount_for_bitmap_partitioning;
for (i = 0; i < texels_to_process; i++)
{
int idx = bsd->texels_for_bitmap_partitioning[i];
bitmaps[partition_of_texel[idx]] |= 1ULL << i;
}
int bitcounts[PARTITION_COUNT];
// for each entry in the partition table, count bits of partition-mismatch.
count_partition_mismatch_bits(xdim, ydim, zdim, partition_count, bitmaps, bitcounts);
// finally, sort the partitions by bits-of-partition-mismatch
get_partition_ordering_by_mismatch_bits(bitcounts, ordering);
}

681
3rdparty/astc/astc_lib.cpp vendored Normal file
View File

@@ -0,0 +1,681 @@
/*----------------------------------------------------------------------------*/
/**
* @author Andrew Willmott
*
* @brief Library api for astc codec, to be used as an alternative to astc_toplevel.cpp
*/
/*----------------------------------------------------------------------------*/
#include "astc_lib.h"
#include "astc_codec_internals.h"
#include <math.h>
#include <stdio.h>
// Globals declared in astc_codec_internals.h
int perform_srgb_transform = 0;
int alpha_force_use_of_hdr = 0;
int rgb_force_use_of_hdr = 0;
int print_tile_errors = 0;
#ifdef DEBUG_PRINT_DIAGNOSTICS
int print_diagnostics = 0;
int diagnostics_tile = -1;
#endif
// ASTC code expects this to be defined
void astc_codec_internal_error(const char* filename, int line)
{
fprintf(stderr, "ASTC encode error @ %s:%d\n", filename, line);
}
// @todo add HDR variants
namespace
{
static bool s_tables_initialised = false;
inline void init_tables()
{
if (!s_tables_initialised)
{
prepare_angular_tables();
build_quantization_mode_table();
s_tables_initialised = true;
}
}
const swizzlepattern k_swizzles[] =
{
{ 0, 1, 2, 3 }, // ASTC_RGBA
{ 2, 1, 0, 3 }, // ASTC_BGRA
};
void alloc_temp_buffers(compress_symbolic_block_buffers* temp_buffers)
{
temp_buffers->ewb = new error_weight_block;
temp_buffers->ewbo = new error_weight_block_orig;
temp_buffers->tempblocks = new symbolic_compressed_block[4];
temp_buffers->temp = new imageblock;
temp_buffers->planes2 = new compress_fixed_partition_buffers;
temp_buffers->planes2->ei1 = new endpoints_and_weights;
temp_buffers->planes2->ei2 = new endpoints_and_weights;
temp_buffers->planes2->eix1 = new endpoints_and_weights[MAX_DECIMATION_MODES];
temp_buffers->planes2->eix2 = new endpoints_and_weights[MAX_DECIMATION_MODES];
temp_buffers->planes2->decimated_quantized_weights = new float[2 * MAX_DECIMATION_MODES * MAX_WEIGHTS_PER_BLOCK];
temp_buffers->planes2->decimated_weights = new float[2 * MAX_DECIMATION_MODES * MAX_WEIGHTS_PER_BLOCK];
temp_buffers->planes2->flt_quantized_decimated_quantized_weights = new float[2 * MAX_WEIGHT_MODES * MAX_WEIGHTS_PER_BLOCK];
temp_buffers->planes2->u8_quantized_decimated_quantized_weights = new uint8_t[2 * MAX_WEIGHT_MODES * MAX_WEIGHTS_PER_BLOCK];
temp_buffers->plane1 = temp_buffers->planes2;
}
void free_temp_buffers(compress_symbolic_block_buffers* temp_buffers)
{
delete[] temp_buffers->planes2->decimated_quantized_weights;
delete[] temp_buffers->planes2->decimated_weights;
delete[] temp_buffers->planes2->flt_quantized_decimated_quantized_weights;
delete[] temp_buffers->planes2->u8_quantized_decimated_quantized_weights;
delete[] temp_buffers->planes2->eix1;
delete[] temp_buffers->planes2->eix2;
delete temp_buffers->planes2->ei1;
delete temp_buffers->planes2->ei2;
delete temp_buffers->planes2;
delete[] temp_buffers->tempblocks;
delete temp_buffers->temp;
delete temp_buffers->ewbo;
delete temp_buffers->ewb;
}
// More direct version of the astc_codec_image routine, which operates on a
// more conventional 2D image layout. Doesn't support padding, so
// mean_stdev_radius and alpha_radius etc. must be zero.
void to_imageblock
(
imageblock* pb,
const uint8_t* src_data,
int src_stride,
int xpos,
int ypos,
int xsize,
int ysize,
int xdim,
int ydim,
swizzlepattern swz,
bool srgb
)
{
float* fptr = pb->orig_data;
pb->xpos = xpos;
pb->ypos = ypos;
pb->zpos = 0;
float data[6];
data[4] = 0;
data[5] = 1;
for (int y = 0; y < ydim; y++)
{
for (int x = 0; x < xdim; x++)
{
int xi = xpos + x;
int yi = ypos + y;
if (xi >= xsize)
xi = xsize - 1;
if (yi >= ysize)
yi = ysize - 1;
int offset = src_stride * yi + 4 * xi;
int r = src_data[offset + 0];
int g = src_data[offset + 1];
int b = src_data[offset + 2];
int a = src_data[offset + 3];
data[0] = r / 255.0f;
data[1] = g / 255.0f;
data[2] = b / 255.0f;
data[3] = a / 255.0f;
fptr[0] = data[swz.r];
fptr[1] = data[swz.g];
fptr[2] = data[swz.b];
fptr[3] = data[swz.a];
fptr += 4;
}
}
// perform sRGB-to-linear transform on input data, if requested.
int pixelcount = xdim * ydim;
if (srgb)
{
fptr = pb->orig_data;
for (int i = 0; i < pixelcount; i++)
{
float r = fptr[0];
float g = fptr[1];
float b = fptr[2];
if (r <= 0.04045f)
r = r * (1.0f / 12.92f);
else if (r <= 1)
r = pow((r + 0.055f) * (1.0f / 1.055f), 2.4f);
if (g <= 0.04045f)
g = g * (1.0f / 12.92f);
else if (g <= 1)
g = pow((g + 0.055f) * (1.0f / 1.055f), 2.4f);
if (b <= 0.04045f)
b = b * (1.0f / 12.92f);
else if (b <= 1)
b = pow((b + 0.055f) * (1.0f / 1.055f), 2.4f);
fptr[0] = r;
fptr[1] = g;
fptr[2] = b;
fptr += 4;
}
}
for (int i = 0; i < pixelcount; i++)
{
pb->rgb_lns [i] = 0;
pb->alpha_lns[i] = 0;
pb->nan_texel[i] = 0;
}
imageblock_initialize_work_from_orig(pb, pixelcount);
update_imageblock_flags(pb, xdim, ydim, 1);
}
void encode_astc
(
const uint8_t* src,
int src_stride,
swizzlepattern src_swz,
int xsize,
int ysize,
int xdim,
int ydim,
const error_weighting_params* ewp,
astc_decode_mode decode_mode,
uint8_t* dst
)
{
int xblocks = (xsize + xdim - 1) / xdim;
int yblocks = (ysize + ydim - 1) / ydim;
get_block_size_descriptor(xdim, ydim, 1);
get_partition_table(xdim, ydim, 1, 0);
imageblock pb;
compress_symbolic_block_buffers temp_buffers;
alloc_temp_buffers(&temp_buffers);
astc_codec_image image_info = { nullptr, nullptr, xsize, ysize, 1, 0 };
for (int y = 0; y < yblocks; y++)
for (int x = 0; x < xblocks; x++)
{
to_imageblock(&pb, src, src_stride, x * xdim, y * ydim, xsize, ysize, xdim, ydim, src_swz, decode_mode == DECODE_LDR_SRGB);
symbolic_compressed_block scb;
compress_symbolic_block(&image_info, decode_mode, xdim, ydim, 1, ewp, &pb, &scb, &temp_buffers);
physical_compressed_block pcb = symbolic_to_physical(xdim, ydim, 1, &scb);
uint8_t* dst_block = dst + (y * xblocks + x) * 16;
*(physical_compressed_block*) dst_block = pcb;
}
free_temp_buffers(&temp_buffers);
}
void init_ewp(error_weighting_params& ewp)
{
ewp.rgb_power = 1.0f;
ewp.alpha_power = 1.0f;
ewp.rgb_base_weight = 1.0f;
ewp.alpha_base_weight = 1.0f;
ewp.rgb_mean_weight = 0.0f;
ewp.rgb_stdev_weight = 0.0f;
ewp.alpha_mean_weight = 0.0f;
ewp.alpha_stdev_weight = 0.0f;
ewp.rgb_mean_and_stdev_mixing = 0.0f;
ewp.mean_stdev_radius = 0;
ewp.enable_rgb_scale_with_alpha = 0;
ewp.alpha_radius = 0;
ewp.block_artifact_suppression = 0.0f;
ewp.rgba_weights[0] = 1.0f;
ewp.rgba_weights[1] = 1.0f;
ewp.rgba_weights[2] = 1.0f;
ewp.rgba_weights[3] = 1.0f;
ewp.ra_normal_angular_scale = 0;
}
void setup_ewp(ASTC_COMPRESS_MODE mode, int ydim, int xdim, error_weighting_params& ewp)
{
float oplimit_autoset = 0.0;
float dblimit_autoset_2d = 0.0;
float bmc_autoset = 0.0;
float mincorrel_autoset = 0.0;
int plimit_autoset = -1;
int maxiters_autoset = 0;
int pcdiv = 1;
float log10_texels_2d = log((float)(xdim * ydim)) / log(10.0f);
if (mode == ASTC_COMPRESS_VERY_FAST)
{
plimit_autoset = 2;
oplimit_autoset = 1.0;
dblimit_autoset_2d = MAX(70 - 35 * log10_texels_2d, 53 - 19 * log10_texels_2d);
bmc_autoset = 25;
mincorrel_autoset = 0.5;
maxiters_autoset = 1;
switch (ydim)
{
case 4:
pcdiv = 240;
break;
case 5:
pcdiv = 56;
break;
case 6:
pcdiv = 64;
break;
case 8:
pcdiv = 47;
break;
case 10:
pcdiv = 36;
break;
case 12:
pcdiv = 30;
break;
default:
pcdiv = 30;
break;
}
}
else if (mode == ASTC_COMPRESS_FAST)
{
plimit_autoset = 4;
oplimit_autoset = 1.0;
mincorrel_autoset = 0.5;
dblimit_autoset_2d = MAX(85 - 35 * log10_texels_2d, 63 - 19 * log10_texels_2d);
bmc_autoset = 50;
maxiters_autoset = 1;
switch (ydim)
{
case 4:
pcdiv = 60;
break;
case 5:
pcdiv = 27;
break;
case 6:
pcdiv = 30;
break;
case 8:
pcdiv = 24;
break;
case 10:
pcdiv = 16;
break;
case 12:
pcdiv = 20;
break;
default:
pcdiv = 20;
break;
};
}
else if (mode == ASTC_COMPRESS_MEDIUM)
{
plimit_autoset = 25;
oplimit_autoset = 1.2f;
mincorrel_autoset = 0.75f;
dblimit_autoset_2d = MAX(95 - 35 * log10_texels_2d, 70 - 19 * log10_texels_2d);
bmc_autoset = 75;
maxiters_autoset = 2;
switch (ydim)
{
case 4:
pcdiv = 25;
break;
case 5:
pcdiv = 15;
break;
case 6:
pcdiv = 15;
break;
case 8:
pcdiv = 10;
break;
case 10:
pcdiv = 8;
break;
case 12:
pcdiv = 6;
break;
default:
pcdiv = 6;
break;
};
}
else if (mode == ASTC_COMPRESS_THOROUGH)
{
plimit_autoset = 100;
oplimit_autoset = 2.5f;
mincorrel_autoset = 0.95f;
dblimit_autoset_2d = MAX(105 - 35 * log10_texels_2d, 77 - 19 * log10_texels_2d);
bmc_autoset = 95;
maxiters_autoset = 4;
switch (ydim)
{
case 4:
pcdiv = 12;
break;
case 5:
pcdiv = 7;
break;
case 6:
pcdiv = 7;
break;
case 8:
pcdiv = 5;
break;
case 10:
pcdiv = 4;
break;
case 12:
pcdiv = 3;
break;
default:
pcdiv = 3;
break;
};
}
else if (mode == ASTC_COMPRESS_EXHAUSTIVE)
{
plimit_autoset = PARTITION_COUNT;
oplimit_autoset = 1000.0f;
mincorrel_autoset = 0.99f;
dblimit_autoset_2d = 999.0f;
bmc_autoset = 100;
maxiters_autoset = 4;
switch (ydim)
{
case 4:
pcdiv = 3;
break;
case 5:
pcdiv = 1;
break;
case 6:
pcdiv = 1;
break;
case 8:
pcdiv = 1;
break;
case 10:
pcdiv = 1;
break;
case 12:
pcdiv = 1;
break;
default:
pcdiv = 1;
break;
}
}
int partitions_to_test = plimit_autoset;
float dblimit_2d = dblimit_autoset_2d;
float oplimit = oplimit_autoset;
float mincorrel = mincorrel_autoset;
int maxiters = maxiters_autoset;
ewp.max_refinement_iters = maxiters;
ewp.block_mode_cutoff = bmc_autoset / 100.0f;
float texel_avg_error_limit_2d;
if (rgb_force_use_of_hdr == 0)
{
texel_avg_error_limit_2d = pow(0.1f, dblimit_2d * 0.1f) * 65535.0f * 65535.0f;
}
else
{
texel_avg_error_limit_2d = 0.0f;
}
ewp.partition_1_to_2_limit = oplimit;
ewp.lowest_correlation_cutoff = mincorrel;
if (partitions_to_test < 1)
partitions_to_test = 1;
else if (partitions_to_test > PARTITION_COUNT)
partitions_to_test = PARTITION_COUNT;
ewp.partition_search_limit = partitions_to_test;
ewp.texel_avg_error_limit = texel_avg_error_limit_2d;
expand_block_artifact_suppression(xdim, ydim, 1, &ewp);
}
}
size_t astc_compressed_size(int w, int h, int bw, int bh)
{
int nx = (w + bw - 1) / bw;
int ny = (h + bh - 1) / bh;
return nx * ny * 16;
}
void astc_compress
(
int src_width,
int src_height,
const uint8_t* src_data,
ASTC_CHANNELS src_channels,
int src_stride,
int block_width,
int block_height,
ASTC_COMPRESS_MODE compress_mode,
ASTC_DECODE_MODE decode_mode,
uint8_t* dst_data
)
{
init_tables();
error_weighting_params ewp;
init_ewp(ewp);
setup_ewp(compress_mode, block_width, block_height, ewp);
if (src_stride == 0)
src_stride = src_width * 4;
encode_astc
(
src_data,
src_stride,
k_swizzles[src_channels],
src_width, src_height,
block_width, block_height,
&ewp,
(astc_decode_mode) decode_mode,
dst_data
);
}
namespace
{
// More direct version of the astc_codec_image routine, which operates on a
// more conventional 2D image layout.
void from_imageblock(int xdim, int ydim, const imageblock* pb, bool srgb, swizzlepattern swz, uint8_t* dst_data, int dst_stride)
{
const float* fptr = pb->orig_data;
const uint8_t* nptr = pb->nan_texel;
for (int y = 0; y < ydim; y++)
{
for (int x = 0; x < xdim; x++)
{
if (*nptr)
{
// NaN-pixel, but we can't display it. Display purple instead.
dst_data[4 * x + swz.r] = 0xFF;
dst_data[4 * x + swz.g] = 0x00;
dst_data[4 * x + swz.b] = 0xFF;
dst_data[4 * x + swz.a] = 0xFF;
}
else
{
float r = fptr[0];
float g = fptr[1];
float b = fptr[2];
float a = fptr[3];
if (srgb)
{
if (r <= 0.0031308f)
r = r * 12.92f;
else if (r <= 1)
r = 1.055f * pow(r, (1.0f / 2.4f)) - 0.055f;
if (g <= 0.0031308f)
g = g * 12.92f;
else if (g <= 1)
g = 1.055f * pow(g, (1.0f / 2.4f)) - 0.055f;
if (b <= 0.0031308f)
b = b * 12.92f;
else if (b <= 1)
b = 1.055f * pow(b, (1.0f / 2.4f)) - 0.055f;
}
// clamp to [0,1]
if (r > 1.0f)
r = 1.0f;
if (g > 1.0f)
g = 1.0f;
if (b > 1.0f)
b = 1.0f;
if (a > 1.0f)
a = 1.0f;
// pack the data
dst_data[4 * x + swz.r] = uint8_t(floorf(r * 255.0f + 0.5f));
dst_data[4 * x + swz.g] = uint8_t(floorf(g * 255.0f + 0.5f));
dst_data[4 * x + swz.b] = uint8_t(floorf(b * 255.0f + 0.5f));
dst_data[4 * x + swz.a] = uint8_t(floorf(a * 255.0f + 0.5f));
}
fptr += 4;
nptr++;
}
dst_data += dst_stride;
}
}
}
void astc_decompress
(
const uint8_t* src_data,
int xdim,
int ydim,
ASTC_DECODE_MODE decode_mode,
int xsize,
int ysize,
uint8_t* dst_data,
ASTC_CHANNELS dst_channels,
int dst_stride
)
{
init_tables();
int xblocks = (xsize + xdim - 1) / xdim;
int yblocks = (ysize + ydim - 1) / ydim;
if (dst_stride == 0)
dst_stride = 4 * xsize;
imageblock pb;
for (int y = 0; y < yblocks; y++)
{
int ypos = y * ydim;
int clamp_ydim = MIN(ysize - ypos, ydim);
uint8_t* dst_row = dst_data + ypos * dst_stride;
for (int x = 0; x < xblocks; x++)
{
int xpos = x * xdim;
int clamp_xdim = MIN(xsize - xpos, xdim);
physical_compressed_block pcb = *(const physical_compressed_block *) src_data;
symbolic_compressed_block scb;
physical_to_symbolic(xdim, ydim, 1, pcb, &scb);
decompress_symbolic_block((astc_decode_mode) decode_mode, xdim, ydim, 1, xpos, ypos, 0, &scb, &pb);
from_imageblock(clamp_xdim, clamp_ydim, &pb, decode_mode == ASTC_DECODE_LDR_SRGB, k_swizzles[dst_channels], dst_row + xpos * 4, dst_stride);
src_data += 16;
}
}
}
// Relevant astc source files. These aren't set up for a bulk build yet though.
#ifdef DISABLED
#include "astc_block_sizes2.cpp"
#include "astc_color_quantize.cpp"
#include "astc_color_unquantize.cpp"
#include "astc_compress_symbolic.cpp"
#include "astc_compute_variance.cpp"
#include "astc_decompress_symbolic.cpp"
#include "astc_encoding_choice_error.cpp"
#include "astc_find_best_partitioning.cpp"
#include "astc_ideal_endpoints_and_weights.cpp"
#include "astc_imageblock.cpp"
#include "astc_integer_sequence.cpp"
#include "astc_kmeans_partitioning.cpp"
#include "astc_partition_tables.cpp"
#include "astc_percentile_tables.cpp"
#include "astc_pick_best_endpoint_format.cpp"
#include "astc_quantization.cpp"
#include "astc_symbolic_physical.cpp"
#include "astc_weight_align.cpp"
#include "astc_weight_quant_xfer_tables.cpp"
#include "mathlib.cpp"
#include "softfloat.cpp"
#endif

73
3rdparty/astc/astc_lib.h vendored Normal file
View File

@@ -0,0 +1,73 @@
/*----------------------------------------------------------------------------*/
/**
* @author Andrew Willmott
*
* @brief Library api for astc codec, to be used as an alternative to astc_toplevel.cpp
*/
/*----------------------------------------------------------------------------*/
#ifndef ASTC_LIB_H
#define ASTC_LIB_H
#include <stdint.h>
#include <stdlib.h>
enum ASTC_COMPRESS_MODE // Trade-off compression quality for speed
{
ASTC_COMPRESS_VERY_FAST,
ASTC_COMPRESS_FAST,
ASTC_COMPRESS_MEDIUM,
ASTC_COMPRESS_THOROUGH,
ASTC_COMPRESS_EXHAUSTIVE,
};
enum ASTC_DECODE_MODE
{
ASTC_DECODE_LDR_SRGB, // texture will be decompressed to 8-bit SRGB
ASTC_DECODE_LDR_LINEAR, // texture will be decompressed to 8-bit linear
ASTC_DECODE_HDR // texture will be decompressed to 16-bit linear
};
enum ASTC_CHANNELS
{
ASTC_RGBA,
ASTC_BGRA
};
size_t astc_compressed_size(int block_width, int block_height, int width, int height);
//!< Returns size of the compressed data for a width x height source image, assuming the given block size
void astc_compress
(
int src_width,
int src_height,
const uint8_t* src_data,
ASTC_CHANNELS src_channels,
int src_stride,
int block_width,
int block_height,
ASTC_COMPRESS_MODE compress_mode,
ASTC_DECODE_MODE decode_mode,
uint8_t* dst_data
);
//!< Compress 8-bit rgba source image into dst_data (expected to be of size astc_compressed_size(...))
void astc_decompress
(
const uint8_t* src_data,
int block_width,
int block_height,
ASTC_DECODE_MODE decode_mode,
int dst_width,
int dst_height,
uint8_t* dst_data,
ASTC_CHANNELS dst_channels,
int dst_stride
);
//!< Decompress astc source image into 8-bit rgba destination image.
#endif

323
3rdparty/astc/astc_partition_tables.cpp vendored Normal file
View File

@@ -0,0 +1,323 @@
/*----------------------------------------------------------------------------*/
/**
* This confidential and proprietary software may be used only as
* authorised by a licensing agreement from ARM Limited
* (C) COPYRIGHT 2011-2012 ARM Limited
* ALL RIGHTS RESERVED
*
* The entire notice above must be reproduced on all authorised
* copies and copies may only be made to the extent permitted
* by a licensing agreement from ARM Limited.
*
* @brief Functions to generate partition tables for ASTC.
*
* We generate tables only for the block sizes that have actually been
* specified to the codec.
*/
/*----------------------------------------------------------------------------*/
#include "astc_codec_internals.h"
static partition_info **partition_tables[4096];
/*
Produce a canonicalized representation of a partition pattern
The largest possible such representation is 432 bits, equal to 7 uint64_t values.
*/
static void gen_canonicalized_partition_table(int texel_count, const uint8_t * partition_table, uint64_t canonicalized[7])
{
int i;
for (i = 0; i < 7; i++)
canonicalized[i] = 0;
int mapped_index[4];
int map_weight_count = 0;
for (i = 0; i < 4; i++)
mapped_index[i] = -1;
for (i = 0; i < texel_count; i++)
{
int index = partition_table[i];
if (mapped_index[index] == -1)
mapped_index[index] = map_weight_count++;
uint64_t xlat_index = mapped_index[index];
canonicalized[i >> 5] |= xlat_index << (2 * (i & 0x1F));
}
}
static int compare_canonicalized_partition_tables(const uint64_t part1[7], const uint64_t part2[7])
{
if (part1[0] != part2[0])
return 0;
if (part1[1] != part2[1])
return 0;
if (part1[2] != part2[2])
return 0;
if (part1[3] != part2[3])
return 0;
if (part1[4] != part2[4])
return 0;
if (part1[5] != part2[5])
return 0;
if (part1[6] != part2[6])
return 0;
return 1;
}
/*
For a partition table, detect partitionings that are equivalent, then mark them as invalid. This reduces the number of partitions that the codec has to consider and thus improves encode
performance. */
static void partition_table_zap_equal_elements(int xdim, int ydim, int zdim, partition_info * pi)
{
int partition_tables_zapped = 0;
int texel_count = xdim * ydim * zdim;
int i, j;
uint64_t *canonicalizeds = new uint64_t[PARTITION_COUNT * 7];
for (i = 0; i < PARTITION_COUNT; i++)
{
gen_canonicalized_partition_table(texel_count, pi[i].partition_of_texel, canonicalizeds + i * 7);
}
for (i = 0; i < PARTITION_COUNT; i++)
{
for (j = 0; j < i; j++)
{
if (compare_canonicalized_partition_tables(canonicalizeds + 7 * i, canonicalizeds + 7 * j))
{
pi[i].partition_count = 0;
partition_tables_zapped++;
break;
}
}
}
delete[]canonicalizeds;
}
uint32_t hash52(uint32_t inp)
{
inp ^= inp >> 15;
inp *= 0xEEDE0891; // (2^4+1)*(2^7+1)*(2^17-1)
inp ^= inp >> 5;
inp += inp << 16;
inp ^= inp >> 7;
inp ^= inp >> 3;
inp ^= inp << 6;
inp ^= inp >> 17;
return inp;
}
int select_partition(int seed, int x, int y, int z, int partitioncount, int small_block)
{
if (small_block)
{
x <<= 1;
y <<= 1;
z <<= 1;
}
seed += (partitioncount - 1) * 1024;
uint32_t rnum = hash52(seed);
uint8_t seed1 = rnum & 0xF;
uint8_t seed2 = (rnum >> 4) & 0xF;
uint8_t seed3 = (rnum >> 8) & 0xF;
uint8_t seed4 = (rnum >> 12) & 0xF;
uint8_t seed5 = (rnum >> 16) & 0xF;
uint8_t seed6 = (rnum >> 20) & 0xF;
uint8_t seed7 = (rnum >> 24) & 0xF;
uint8_t seed8 = (rnum >> 28) & 0xF;
uint8_t seed9 = (rnum >> 18) & 0xF;
uint8_t seed10 = (rnum >> 22) & 0xF;
uint8_t seed11 = (rnum >> 26) & 0xF;
uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
// squaring all the seeds in order to bias their distribution
// towards lower values.
seed1 *= seed1;
seed2 *= seed2;
seed3 *= seed3;
seed4 *= seed4;
seed5 *= seed5;
seed6 *= seed6;
seed7 *= seed7;
seed8 *= seed8;
seed9 *= seed9;
seed10 *= seed10;
seed11 *= seed11;
seed12 *= seed12;
int sh1, sh2, sh3;
if (seed & 1)
{
sh1 = (seed & 2 ? 4 : 5);
sh2 = (partitioncount == 3 ? 6 : 5);
}
else
{
sh1 = (partitioncount == 3 ? 6 : 5);
sh2 = (seed & 2 ? 4 : 5);
}
sh3 = (seed & 0x10) ? sh1 : sh2;
seed1 >>= sh1;
seed2 >>= sh2;
seed3 >>= sh1;
seed4 >>= sh2;
seed5 >>= sh1;
seed6 >>= sh2;
seed7 >>= sh1;
seed8 >>= sh2;
seed9 >>= sh3;
seed10 >>= sh3;
seed11 >>= sh3;
seed12 >>= sh3;
int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
// apply the saw
a &= 0x3F;
b &= 0x3F;
c &= 0x3F;
d &= 0x3F;
// remove some of the components if we are to output < 4 partitions.
if (partitioncount <= 3)
d = 0;
if (partitioncount <= 2)
c = 0;
if (partitioncount <= 1)
b = 0;
int partition;
if (a >= b && a >= c && a >= d)
partition = 0;
else if (b >= c && b >= d)
partition = 1;
else if (c >= d)
partition = 2;
else
partition = 3;
return partition;
}
void generate_one_partition_table(int xdim, int ydim, int zdim, int partition_count, int partition_index, partition_info * pt)
{
int small_block = (xdim * ydim * zdim) < 32;
uint8_t *partition_of_texel = pt->partition_of_texel;
int x, y, z, i;
for (z = 0; z < zdim; z++)
for (y = 0; y < ydim; y++)
for (x = 0; x < xdim; x++)
{
uint8_t part = select_partition(partition_index, x, y, z, partition_count, small_block);
*partition_of_texel++ = part;
}
int texels_per_block = xdim * ydim * zdim;
int counts[4];
for (i = 0; i < 4; i++)
counts[i] = 0;
for (i = 0; i < texels_per_block; i++)
{
int partition = pt->partition_of_texel[i];
pt->texels_of_partition[partition][counts[partition]++] = i;
}
for (i = 0; i < 4; i++)
pt->texels_per_partition[i] = counts[i];
if (counts[0] == 0)
pt->partition_count = 0;
else if (counts[1] == 0)
pt->partition_count = 1;
else if (counts[2] == 0)
pt->partition_count = 2;
else if (counts[3] == 0)
pt->partition_count = 3;
else
pt->partition_count = 4;
for (i = 0; i < 4; i++)
pt->coverage_bitmaps[i] = 0ULL;
const block_size_descriptor *bsd = get_block_size_descriptor(xdim, ydim, zdim);
int texels_to_process = bsd->texelcount_for_bitmap_partitioning;
for (i = 0; i < texels_to_process; i++)
{
int idx = bsd->texels_for_bitmap_partitioning[i];
pt->coverage_bitmaps[pt->partition_of_texel[idx]] |= 1ULL << i;
}
}
static void generate_partition_tables(int xdim, int ydim, int zdim)
{
int i;
partition_info *one_partition = new partition_info;
partition_info *two_partitions = new partition_info[1024];
partition_info *three_partitions = new partition_info[1024];
partition_info *four_partitions = new partition_info[1024];
partition_info **partition_table = new partition_info *[5];
partition_table[0] = NULL;
partition_table[1] = one_partition;
partition_table[2] = two_partitions;
partition_table[3] = three_partitions;
partition_table[4] = four_partitions;
generate_one_partition_table(xdim, ydim, zdim, 1, 0, one_partition);
for (i = 0; i < 1024; i++)
{
generate_one_partition_table(xdim, ydim, zdim, 2, i, two_partitions + i);
generate_one_partition_table(xdim, ydim, zdim, 3, i, three_partitions + i);
generate_one_partition_table(xdim, ydim, zdim, 4, i, four_partitions + i);
}
partition_table_zap_equal_elements(xdim, ydim, zdim, two_partitions);
partition_table_zap_equal_elements(xdim, ydim, zdim, three_partitions);
partition_table_zap_equal_elements(xdim, ydim, zdim, four_partitions);
partition_tables[xdim + 16 * ydim + 256 * zdim] = partition_table;
}
const partition_info *get_partition_table(int xdim, int ydim, int zdim, int partition_count)
{
int ptindex = xdim + 16 * ydim + 256 * zdim;
if (partition_tables[ptindex] == NULL)
generate_partition_tables(xdim, ydim, zdim);
return partition_tables[ptindex][partition_count];
}

4768
3rdparty/astc/astc_percentile_tables.cpp vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,938 @@
/*----------------------------------------------------------------------------*/
/**
* This confidential and proprietary software may be used only as
* authorised by a licensing agreement from ARM Limited
* (C) COPYRIGHT 2011-2012 ARM Limited
* ALL RIGHTS RESERVED
*
* The entire notice above must be reproduced on all authorised
* copies and copies may only be made to the extent permitted
* by a licensing agreement from ARM Limited.
*
* @brief Functions to pick the best ASTC endpoint format for a given block.
*/
/*----------------------------------------------------------------------------*/
#include "astc_codec_internals.h"
#ifdef DEBUG_PRINT_DIAGNOSTICS
#include <stdio.h>
#endif
#include <math.h>
/*
functions to determine, for a given partitioning, which color endpoint formats are the best to use.
*/
// for a given partition, compute for every (integer-component-count, quantization-level)
// the color error.
static void compute_color_error_for_every_integer_count_and_quantization_level(int encode_hdr_rgb, // 1 = perform HDR encoding, 0 = perform LDR encoding.
int encode_hdr_alpha, int partition_index, const partition_info * pi,
const encoding_choice_errors * eci, // pointer to the structure for the CURRENT partition.
const endpoints * ep, float4 error_weightings[4],
// arrays to return results back through.
float best_error[21][4], int format_of_choice[21][4])
{
int i, j;
int partition_size = pi->texels_per_partition[partition_index];
static const float baseline_quant_error[21] = {
(65536.0f * 65536.0f / 18.0f), // 2 values, 1 step
(65536.0f * 65536.0f / 18.0f) / (2 * 2), // 3 values, 2 steps
(65536.0f * 65536.0f / 18.0f) / (3 * 3), // 4 values, 3 steps
(65536.0f * 65536.0f / 18.0f) / (4 * 4), // 5 values
(65536.0f * 65536.0f / 18.0f) / (5 * 5),
(65536.0f * 65536.0f / 18.0f) / (7 * 7),
(65536.0f * 65536.0f / 18.0f) / (9 * 9),
(65536.0f * 65536.0f / 18.0f) / (11 * 11),
(65536.0f * 65536.0f / 18.0f) / (15 * 15),
(65536.0f * 65536.0f / 18.0f) / (19 * 19),
(65536.0f * 65536.0f / 18.0f) / (23 * 23),
(65536.0f * 65536.0f / 18.0f) / (31 * 31),
(65536.0f * 65536.0f / 18.0f) / (39 * 39),
(65536.0f * 65536.0f / 18.0f) / (47 * 47),
(65536.0f * 65536.0f / 18.0f) / (63 * 63),
(65536.0f * 65536.0f / 18.0f) / (79 * 79),
(65536.0f * 65536.0f / 18.0f) / (95 * 95),
(65536.0f * 65536.0f / 18.0f) / (127 * 127),
(65536.0f * 65536.0f / 18.0f) / (159 * 159),
(65536.0f * 65536.0f / 18.0f) / (191 * 191),
(65536.0f * 65536.0f / 18.0f) / (255 * 255)
};
float4 ep0 = ep->endpt0[partition_index];
float4 ep1 = ep->endpt1[partition_index];
float ep0_max = MAX(MAX(ep0.x, ep0.y), ep0.z);
float ep0_min = MIN(MIN(ep0.x, ep0.y), ep0.z);
float ep1_max = MAX(MAX(ep1.x, ep1.y), ep1.z);
float ep1_min = MIN(MIN(ep1.x, ep1.y), ep1.z);
ep0_min = MAX(ep0_min, 0.0f);
ep1_min = MAX(ep1_min, 0.0f);
ep0_max = MAX(ep0_max, 1e-10f);
ep1_max = MAX(ep1_max, 1e-10f);
float4 error_weight = error_weightings[partition_index];
float error_weight_rgbsum = error_weight.x + error_weight.y + error_weight.z;
float range_upper_limit_rgb = encode_hdr_rgb ? 61440.0f : 65535.0f;
float range_upper_limit_alpha = encode_hdr_alpha ? 61440.0f : 65535.0f;
// it is possible to get endpoint colors significantly outside [0,upper-limit]
// even if the input data are safely contained in [0,upper-limit];
// we need to add an error term for this situation,
float4 ep0_range_error_high;
float4 ep1_range_error_high;
float4 ep0_range_error_low;
float4 ep1_range_error_low;
ep0_range_error_high.x = MAX(0.0f, ep0.x - range_upper_limit_rgb);
ep0_range_error_high.y = MAX(0.0f, ep0.y - range_upper_limit_rgb);
ep0_range_error_high.z = MAX(0.0f, ep0.z - range_upper_limit_rgb);
ep0_range_error_high.w = MAX(0.0f, ep0.w - range_upper_limit_alpha);
ep1_range_error_high.x = MAX(0.0f, ep1.x - range_upper_limit_rgb);
ep1_range_error_high.y = MAX(0.0f, ep1.y - range_upper_limit_rgb);
ep1_range_error_high.z = MAX(0.0f, ep1.z - range_upper_limit_rgb);
ep1_range_error_high.w = MAX(0.0f, ep1.w - range_upper_limit_alpha);
ep0_range_error_low.x = MIN(0.0f, ep0.x);
ep0_range_error_low.y = MIN(0.0f, ep0.y);
ep0_range_error_low.z = MIN(0.0f, ep0.z);
ep0_range_error_low.w = MIN(0.0f, ep0.w);
ep1_range_error_low.x = MIN(0.0f, ep1.x);
ep1_range_error_low.y = MIN(0.0f, ep1.y);
ep1_range_error_low.z = MIN(0.0f, ep1.z);
ep1_range_error_low.w = MIN(0.0f, ep1.w);
float4 sum_range_error =
(ep0_range_error_low * ep0_range_error_low) + (ep1_range_error_low * ep1_range_error_low) + (ep0_range_error_high * ep0_range_error_high) + (ep1_range_error_high * ep1_range_error_high);
float rgb_range_error = dot(sum_range_error.xyz, error_weight.xyz) * 0.5f * partition_size;
float alpha_range_error = sum_range_error.w * error_weight.w * 0.5f * partition_size;
#ifdef DEBUG_PRINT_DIAGNOSTICS
if (print_diagnostics)
{
printf("%s : partition=%d\nrgb-error_wt=%f alpha_error_wt=%f\n", __func__, partition_index, error_weight_rgbsum, error_weight.w);
printf("ep0 = %f %f %f %f\n", ep0.x, ep0.y, ep0.z, ep0.w);
printf("ep1 = %f %f %f %f\n", ep1.x, ep1.y, ep1.z, ep1.w);
printf("rgb_range_error = %f, alpha_range_error = %f\n", rgb_range_error, alpha_range_error);
printf("rgb-luma-error: %f\n", eci->rgb_luma_error);
}
#endif
if (encode_hdr_rgb)
{
// collect some statistics
float af, cf;
if (ep1.x > ep1.y && ep1.x > ep1.z)
{
af = ep1.x;
cf = ep1.x - ep0.x;
}
else if (ep1.y > ep1.z)
{
af = ep1.y;
cf = ep1.y - ep0.y;
}
else
{
af = ep1.z;
cf = ep1.z - ep0.z;
}
float bf = af - ep1_min; // estimate of color-component spread in high endpoint color
float3 prd = ep1.xyz - float3(cf, cf, cf);
float3 pdif = prd - ep0.xyz;
// estimate of color-component spread in low endpoint color
float df = MAX(MAX(fabs(pdif.x), fabs(pdif.y)), fabs(pdif.z));
int b = (int)bf;
int c = (int)cf;
int d = (int)df;
// determine which one of the 6 submodes is likely to be used in
// case of an RGBO-mode
int rgbo_mode = 5; // 7 bits per component
// mode 4: 8 7 6
if (b < 32768 && c < 16384)
rgbo_mode = 4;
// mode 3: 9 6 7
if (b < 8192 && c < 16384)
rgbo_mode = 3;
// mode 2: 10 5 8
if (b < 2048 && c < 16384)
rgbo_mode = 2;
// mode 1: 11 6 5
if (b < 2048 && c < 1024)
rgbo_mode = 1;
// mode 0: 11 5 7
if (b < 1024 && c < 4096)
rgbo_mode = 0;
// determine which one of the 9 submodes is likely to be used in
// case of an RGB-mode.
int rgb_mode = 8; // 8 bits per component, except 7 bits for blue
// mode 0: 9 7 6 7
if (b < 16384 && c < 8192 && d < 8192)
rgb_mode = 0;
// mode 1: 9 8 6 6
if (b < 32768 && c < 8192 && d < 4096)
rgb_mode = 1;
// mode 2: 10 6 7 7
if (b < 4096 && c < 8192 && d < 4096)
rgb_mode = 2;
// mode 3: 10 7 7 6
if (b < 8192 && c < 8192 && d < 2048)
rgb_mode = 3;
// mode 4: 11 8 6 5
if (b < 8192 && c < 2048 && d < 512)
rgb_mode = 4;
// mode 5: 11 6 8 6
if (b < 2048 && c < 8192 && d < 1024)
rgb_mode = 5;
// mode 6: 12 7 7 5
if (b < 2048 && c < 2048 && d < 256)
rgb_mode = 6;
// mode 7: 12 6 7 6
if (b < 1024 && c < 2048 && d < 512)
rgb_mode = 7;
static const float rgbo_error_scales[6] = { 4.0f, 4.0f, 16.0f, 64.0f, 256.0f, 1024.0f };
static const float rgb_error_scales[9] = { 64.0f, 64.0f, 16.0f, 16.0f, 4.0f, 4.0f, 1.0f, 1.0f, 384.0f };
float mode7mult = rgbo_error_scales[rgbo_mode] * 0.0015f; // empirically determined ....
float mode11mult = rgb_error_scales[rgb_mode] * 0.010f; // empirically determined ....
float lum_high = (ep1.x + ep1.y + ep1.z) * (1.0f / 3.0f);
float lum_low = (ep0.x + ep0.y + ep0.z) * (1.0f / 3.0f);
float lumdif = lum_high - lum_low;
float mode23mult = lumdif < 960 ? 4.0f : lumdif < 3968 ? 16.0f : 128.0f;
mode23mult *= 0.0005f; // empirically determined ....
// pick among the available HDR endpoint modes
for (i = 0; i < 8; i++)
{
best_error[i][3] = 1e30f;
format_of_choice[i][3] = encode_hdr_alpha ? FMT_HDR_RGBA : FMT_HDR_RGB_LDR_ALPHA;
best_error[i][2] = 1e30f;
format_of_choice[i][2] = FMT_HDR_RGB;
best_error[i][1] = 1e30f;
format_of_choice[i][1] = FMT_HDR_RGB_SCALE;
best_error[i][0] = 1e30f;
format_of_choice[i][0] = FMT_HDR_LUMINANCE_LARGE_RANGE;
}
for (i = 8; i < 21; i++)
{
// base_quant_error should depend on the scale-factor that would be used
// during actual encode of the color value.
float base_quant_error = baseline_quant_error[i] * partition_size * 1.0f;
float rgb_quantization_error = error_weight_rgbsum * base_quant_error * 2.0f;
float alpha_quantization_error = error_weight.w * base_quant_error * 2.0f;
float rgba_quantization_error = rgb_quantization_error + alpha_quantization_error;
#ifdef DEBUG_PRINT_DIAGNOSTICS
if (print_diagnostics)
printf("rgba-quant = %f can_offset_encode=%d\n", rgba_quantization_error, eci->can_offset_encode);
#endif
// for 8 integers, we have two encodings: one with HDR alpha and another one
// with LDR alpha.
float full_hdr_rgba_error = rgba_quantization_error + rgb_range_error + alpha_range_error;
best_error[i][3] = full_hdr_rgba_error;
format_of_choice[i][3] = encode_hdr_alpha ? FMT_HDR_RGBA : FMT_HDR_RGB_LDR_ALPHA;
// for 6 integers, we have one HDR-RGB encoding
float full_hdr_rgb_error = (rgb_quantization_error * mode11mult) + rgb_range_error + eci->alpha_drop_error;
best_error[i][2] = full_hdr_rgb_error;
format_of_choice[i][2] = FMT_HDR_RGB;
// for 4 integers, we have one HDR-RGB-Scale encoding
float hdr_rgb_scale_error = (rgb_quantization_error * mode7mult) + rgb_range_error + eci->alpha_drop_error + eci->rgb_luma_error;
best_error[i][1] = hdr_rgb_scale_error;
format_of_choice[i][1] = FMT_HDR_RGB_SCALE;
// for 2 integers, we assume luminance-with-large-range
float hdr_luminance_error = (rgb_quantization_error * mode23mult) + rgb_range_error + eci->alpha_drop_error + eci->luminance_error;
best_error[i][0] = hdr_luminance_error;
format_of_choice[i][0] = FMT_HDR_LUMINANCE_LARGE_RANGE;
#ifdef DEBUG_PRINT_DIAGNOSTICS
if (print_diagnostics)
{
for (j = 0; j < 4; j++)
{
printf("(hdr) quant-level=%d ints=%d format=%d error=%f\n", i, j, format_of_choice[i][j], best_error[i][j]);
}
}
#endif
}
}
else
{
for (i = 0; i < 4; i++)
{
best_error[i][3] = 1e30f;
best_error[i][2] = 1e30f;
best_error[i][1] = 1e30f;
best_error[i][0] = 1e30f;
format_of_choice[i][3] = FMT_RGBA;
format_of_choice[i][2] = FMT_RGB;
format_of_choice[i][1] = FMT_RGB_SCALE;
format_of_choice[i][0] = FMT_LUMINANCE;
}
// pick among the available LDR endpoint modes
for (i = 4; i < 21; i++)
{
float base_quant_error = baseline_quant_error[i] * partition_size * 1.0f;
float rgb_quantization_error = error_weight_rgbsum * base_quant_error;
float alpha_quantization_error = error_weight.w * base_quant_error;
float rgba_quantization_error = rgb_quantization_error + alpha_quantization_error;
#ifdef DEBUG_PRINT_DIAGNOSTICS
if (print_diagnostics)
printf("rgba-quant = %f can_offset_encode=%d\n", rgba_quantization_error, eci->can_offset_encode);
#endif
// for 8 integers, the available encodings are:
// full LDR RGB-Alpha
float full_ldr_rgba_error = rgba_quantization_error;
if (eci->can_blue_contract)
full_ldr_rgba_error *= 0.625f;
if (eci->can_offset_encode && i <= 18)
full_ldr_rgba_error *= 0.5f;
full_ldr_rgba_error += rgb_range_error + alpha_range_error;
best_error[i][3] = full_ldr_rgba_error;
format_of_choice[i][3] = FMT_RGBA;
// for 6 integers, we have:
// - an LDR-RGB encoding
// - an RGBS + Alpha encoding (LDR)
float full_ldr_rgb_error = rgb_quantization_error;
if (eci->can_blue_contract)
full_ldr_rgb_error *= 0.5f;
if (eci->can_offset_encode && i <= 18)
full_ldr_rgb_error *= 0.25f;
full_ldr_rgb_error += eci->alpha_drop_error + rgb_range_error;
float rgbs_alpha_error = rgba_quantization_error + eci->rgb_scale_error + rgb_range_error + alpha_range_error;
if (rgbs_alpha_error < full_ldr_rgb_error)
{
best_error[i][2] = rgbs_alpha_error;
format_of_choice[i][2] = FMT_RGB_SCALE_ALPHA;
}
else
{
best_error[i][2] = full_ldr_rgb_error;
format_of_choice[i][2] = FMT_RGB;
}
// for 4 integers, we have a Luminance-Alpha encoding and the RGBS encoding
float ldr_rgbs_error = rgb_quantization_error + eci->alpha_drop_error + eci->rgb_scale_error + rgb_range_error;
float lum_alpha_error = rgba_quantization_error + eci->luminance_error + rgb_range_error + alpha_range_error;
if (ldr_rgbs_error < lum_alpha_error)
{
best_error[i][1] = ldr_rgbs_error;
format_of_choice[i][1] = FMT_RGB_SCALE;
}
else
{
best_error[i][1] = lum_alpha_error;
format_of_choice[i][1] = FMT_LUMINANCE_ALPHA;
}
// for 2 integers, we have a Luminance-encoding and an Alpha-encoding.
float luminance_error = rgb_quantization_error + eci->alpha_drop_error + eci->luminance_error + rgb_range_error;
best_error[i][0] = luminance_error;
format_of_choice[i][0] = FMT_LUMINANCE;
#ifdef DEBUG_PRINT_DIAGNOSTICS
if (print_diagnostics)
{
for (j = 0; j < 4; j++)
{
printf(" (ldr) quant-level=%d ints=%d format=%d error=%f\n", i, j, format_of_choice[i][j], best_error[i][j]);
}
}
#endif
}
}
}
// for 1 partition, find the best combination (one format + a quantization level) for a given bitcount
static void one_partition_find_best_combination_for_bitcount(float combined_best_error[21][4],
int formats_of_choice[21][4], int bits_available, int *best_quantization_level, int *best_formats, float *error_of_best_combination)
{
int i;
int best_integer_count = -1;
float best_integer_count_error = 1e20f;
for (i = 0; i < 4; i++)
{
// compute the quantization level for a given number of integers and a given number of bits.
int quantization_level = quantization_mode_table[i + 1][bits_available];
if (quantization_level == -1)
continue; // used to indicate the case where we don't have enough bits to represent a given endpoint format at all.
if (combined_best_error[quantization_level][i] < best_integer_count_error)
{
best_integer_count_error = combined_best_error[quantization_level][i];
best_integer_count = i;
}
}
int ql = quantization_mode_table[best_integer_count + 1][bits_available];
*best_quantization_level = ql;
*error_of_best_combination = best_integer_count_error;
if (ql >= 0)
*best_formats = formats_of_choice[ql][best_integer_count];
else
*best_formats = FMT_LUMINANCE;
}
// for 2 partitions, find the best format combinations for every (quantization-mode, integer-count) combination
static void two_partitions_find_best_combination_for_every_quantization_and_integer_count(float best_error[2][21][4], // indexed by (partition, quant-level, integer-pair-count-minus-1)
int format_of_choice[2][21][4],
float combined_best_error[21][7], // indexed by (quant-level, integer-pair-count-minus-2)
int formats_of_choice[21][7][2])
{
int i, j;
for (i = 0; i < 21; i++)
for (j = 0; j < 7; j++)
combined_best_error[i][j] = 1e30f;
int quant;
for (quant = 5; quant < 21; quant++)
{
for (i = 0; i < 4; i++) // integer-count for first endpoint-pair
{
for (j = 0; j < 4; j++) // integer-count for second endpoint-pair
{
int low2 = MIN(i, j);
int high2 = MAX(i, j);
if ((high2 - low2) > 1)
continue;
int intcnt = i + j;
float errorterm = MIN(best_error[0][quant][i] + best_error[1][quant][j], 1e10f);
if (errorterm <= combined_best_error[quant][intcnt])
{
combined_best_error[quant][intcnt] = errorterm;
formats_of_choice[quant][intcnt][0] = format_of_choice[0][quant][i];
formats_of_choice[quant][intcnt][1] = format_of_choice[1][quant][j];
}
}
}
}
}
// for 2 partitions, find the best combination (two formats + a quantization level) for a given bitcount
static void two_partitions_find_best_combination_for_bitcount(float combined_best_error[21][7],
int formats_of_choice[21][7][2],
int bits_available, int *best_quantization_level, int *best_quantization_level_mod, int *best_formats, float *error_of_best_combination)
{
int i;
int best_integer_count = 0;
float best_integer_count_error = 1e20f;
int integer_count;
for (integer_count = 2; integer_count <= 8; integer_count++)
{
// compute the quantization level for a given number of integers and a given number of bits.
int quantization_level = quantization_mode_table[integer_count][bits_available];
if (quantization_level == -1)
break; // used to indicate the case where we don't have enough bits to represent a given endpoint format at all.
float integer_count_error = combined_best_error[quantization_level][integer_count - 2];
if (integer_count_error < best_integer_count_error)
{
best_integer_count_error = integer_count_error;
best_integer_count = integer_count;
}
}
int ql = quantization_mode_table[best_integer_count][bits_available];
int ql_mod = quantization_mode_table[best_integer_count][bits_available + 2];
*best_quantization_level = ql;
*best_quantization_level_mod = ql_mod;
*error_of_best_combination = best_integer_count_error;
if (ql >= 0)
{
for (i = 0; i < 2; i++)
best_formats[i] = formats_of_choice[ql][best_integer_count - 2][i];
}
else
{
for (i = 0; i < 2; i++)
best_formats[i] = FMT_LUMINANCE;
}
}
// for 3 partitions, find the best format combinations for every (quantization-mode, integer-count) combination
static void three_partitions_find_best_combination_for_every_quantization_and_integer_count(float best_error[3][21][4], // indexed by (partition, quant-level, integer-count)
int format_of_choice[3][21][4], float combined_best_error[21][10], int formats_of_choice[21][10][3])
{
int i, j, k;
for (i = 0; i < 21; i++)
for (j = 0; j < 10; j++)
combined_best_error[i][j] = 1e30f;
int quant;
for (quant = 5; quant < 21; quant++)
{
for (i = 0; i < 4; i++) // integer-count for first endpoint-pair
{
for (j = 0; j < 4; j++) // integer-count for second endpoint-pair
{
int low2 = MIN(i, j);
int high2 = MAX(i, j);
if ((high2 - low2) > 1)
continue;
for (k = 0; k < 4; k++) // integer-count for third endpoint-pair
{
int low3 = MIN(k, low2);
int high3 = MAX(k, high2);
if ((high3 - low3) > 1)
continue;
int intcnt = i + j + k;
float errorterm = MIN(best_error[0][quant][i] + best_error[1][quant][j] + best_error[2][quant][k], 1e10f);
if (errorterm <= combined_best_error[quant][intcnt])
{
combined_best_error[quant][intcnt] = errorterm;
formats_of_choice[quant][intcnt][0] = format_of_choice[0][quant][i];
formats_of_choice[quant][intcnt][1] = format_of_choice[1][quant][j];
formats_of_choice[quant][intcnt][2] = format_of_choice[2][quant][k];
}
}
}
}
}
}
// for 3 partitions, find the best combination (three formats + a quantization level) for a given bitcount
static void three_partitions_find_best_combination_for_bitcount(float combined_best_error[21][10],
int formats_of_choice[21][10][3],
int bits_available, int *best_quantization_level, int *best_quantization_level_mod, int *best_formats, float *error_of_best_combination)
{
int i;
int best_integer_count = 0;
float best_integer_count_error = 1e20f;
int integer_count;
for (integer_count = 3; integer_count <= 9; integer_count++)
{
// compute the quantization level for a given number of integers and a given number of bits.
int quantization_level = quantization_mode_table[integer_count][bits_available];
if (quantization_level == -1)
break; // used to indicate the case where we don't have enough bits to represent a given endpoint format at all.
float integer_count_error = combined_best_error[quantization_level][integer_count - 3];
if (integer_count_error < best_integer_count_error)
{
best_integer_count_error = integer_count_error;
best_integer_count = integer_count;
}
}
int ql = quantization_mode_table[best_integer_count][bits_available];
int ql_mod = quantization_mode_table[best_integer_count][bits_available + 5];
*best_quantization_level = ql;
*best_quantization_level_mod = ql_mod;
*error_of_best_combination = best_integer_count_error;
if (ql >= 0)
{
for (i = 0; i < 3; i++)
best_formats[i] = formats_of_choice[ql][best_integer_count - 3][i];
}
else
{
for (i = 0; i < 3; i++)
best_formats[i] = FMT_LUMINANCE;
}
}
// for 4 partitions, find the best format combinations for every (quantization-mode, integer-count) combination
static void four_partitions_find_best_combination_for_every_quantization_and_integer_count(float best_error[4][21][4], // indexed by (partition, quant-level, integer-count)
int format_of_choice[4][21][4], float combined_best_error[21][13], int formats_of_choice[21][13][4])
{
int i, j, k, l;
for (i = 0; i < 21; i++)
for (j = 0; j < 13; j++)
combined_best_error[i][j] = 1e30f;
int quant;
for (quant = 5; quant < 21; quant++)
{
for (i = 0; i < 4; i++) // integer-count for first endpoint-pair
{
for (j = 0; j < 4; j++) // integer-count for second endpoint-pair
{
int low2 = MIN(i, j);
int high2 = MAX(i, j);
if ((high2 - low2) > 1)
continue;
for (k = 0; k < 4; k++) // integer-count for third endpoint-pair
{
int low3 = MIN(k, low2);
int high3 = MAX(k, high2);
if ((high3 - low3) > 1)
continue;
for (l = 0; l < 4; l++) // integer-count for fourth endpoint-pair
{
int low4 = MIN(l, low3);
int high4 = MAX(l, high3);
if ((high4 - low4) > 1)
continue;
int intcnt = i + j + k + l;
float errorterm = MIN(best_error[0][quant][i] + best_error[1][quant][j] + best_error[2][quant][k] + best_error[3][quant][l], 1e10f);
if (errorterm <= combined_best_error[quant][intcnt])
{
combined_best_error[quant][intcnt] = errorterm;
formats_of_choice[quant][intcnt][0] = format_of_choice[0][quant][i];
formats_of_choice[quant][intcnt][1] = format_of_choice[1][quant][j];
formats_of_choice[quant][intcnt][2] = format_of_choice[2][quant][k];
formats_of_choice[quant][intcnt][3] = format_of_choice[3][quant][l];
}
}
}
}
}
}
}
// for 4 partitions, find the best combination (four formats + a quantization level) for a given bitcount
static void four_partitions_find_best_combination_for_bitcount(float combined_best_error[21][13],
int formats_of_choice[21][13][4],
int bits_available, int *best_quantization_level, int *best_quantization_level_mod, int *best_formats, float *error_of_best_combination)
{
int i;
int best_integer_count = 0;
float best_integer_count_error = 1e20f;
int integer_count;
for (integer_count = 4; integer_count <= 9; integer_count++)
{
// compute the quantization level for a given number of integers and a given number of bits.
int quantization_level = quantization_mode_table[integer_count][bits_available];
if (quantization_level == -1)
break; // used to indicate the case where we don't have enough bits to represent a given endpoint format at all.
float integer_count_error = combined_best_error[quantization_level][integer_count - 4];
if (integer_count_error < best_integer_count_error)
{
best_integer_count_error = integer_count_error;
best_integer_count = integer_count;
}
}
int ql = quantization_mode_table[best_integer_count][bits_available];
int ql_mod = quantization_mode_table[best_integer_count][bits_available + 8];
*best_quantization_level = ql;
*best_quantization_level_mod = ql_mod;
*error_of_best_combination = best_integer_count_error;
if (ql >= 0)
{
for (i = 0; i < 4; i++)
best_formats[i] = formats_of_choice[ql][best_integer_count - 4][i];
}
else
{
for (i = 0; i < 4; i++)
best_formats[i] = FMT_LUMINANCE;
}
}
/*
The determine_optimal_set_of_endpoint_formats_to_use() function.
It identifies, for each mode, which set of color endpoint encodings
produces the best overall result. It then reports back which 4 modes
look best, along with the ideal color encoding combination for each.
It takes as input:
a partitioning an imageblock,
a set of color endpoints.
for each mode, the number of bits available for color encoding and the error incurred by quantization.
in case of 2 plane of weights, a specifier for which color component to use for the second plane of weights.
It delivers as output for each of the 4 selected modes:
format specifier
for each partition
quantization level to use
modified quantization level to use
(when all format specifiers are equal)
*/
void determine_optimal_set_of_endpoint_formats_to_use(int xdim, int ydim, int zdim,
const partition_info * pt, const imageblock * blk, const error_weight_block * ewb,
const endpoints * ep,
int separate_component, // separate color component for 2-plane mode; -1 for single-plane mode
// bitcounts and errors computed for the various quantization methods
const int *qwt_bitcounts, const float *qwt_errors,
// output data
int partition_format_specifiers[4][4], int quantized_weight[4],
int quantization_level[4], int quantization_level_mod[4])
{
int i, j;
int partition_count = pt->partition_count;
int encode_hdr_rgb = blk->rgb_lns[0];
int encode_hdr_alpha = blk->alpha_lns[0];
// call a helper function to compute the errors that result from various
// encoding choices (such as using luminance instead of RGB, discarding Alpha,
// using RGB-scale in place of two separate RGB endpoints and so on)
encoding_choice_errors eci[4];
compute_encoding_choice_errors(xdim, ydim, zdim, blk, pt, ewb, separate_component, eci);
// for each partition, compute the error weights to apply for that partition.
float4 error_weightings[4];
float4 dummied_color_scalefactors[4]; // only used to receive data
compute_partition_error_color_weightings(xdim, ydim, zdim, ewb, pt, error_weightings, dummied_color_scalefactors);
float best_error[4][21][4];
int format_of_choice[4][21][4];
for (i = 0; i < partition_count; i++)
compute_color_error_for_every_integer_count_and_quantization_level(encode_hdr_rgb, encode_hdr_alpha, i, pt, &(eci[i]), ep, error_weightings, best_error[i], format_of_choice[i]);
float errors_of_best_combination[MAX_WEIGHT_MODES];
int best_quantization_levels[MAX_WEIGHT_MODES];
int best_quantization_levels_mod[MAX_WEIGHT_MODES];
int best_ep_formats[MAX_WEIGHT_MODES][4];
// code for the case where the block contains 1 partition
if (partition_count == 1)
{
int best_quantization_level;
int best_format;
float error_of_best_combination;
for (i = 0; i < MAX_WEIGHT_MODES; i++)
{
if (qwt_errors[i] >= 1e29f)
{
errors_of_best_combination[i] = 1e30f;
continue;
}
one_partition_find_best_combination_for_bitcount(best_error[0], format_of_choice[0], qwt_bitcounts[i], &best_quantization_level, &best_format, &error_of_best_combination);
error_of_best_combination += qwt_errors[i];
errors_of_best_combination[i] = error_of_best_combination;
best_quantization_levels[i] = best_quantization_level;
best_quantization_levels_mod[i] = best_quantization_level;
best_ep_formats[i][0] = best_format;
}
}
// code for the case where the block contains 2 partitions
else if (partition_count == 2)
{
int best_quantization_level;
int best_quantization_level_mod;
int best_formats[2];
float error_of_best_combination;
float combined_best_error[21][7];
int formats_of_choice[21][7][2];
two_partitions_find_best_combination_for_every_quantization_and_integer_count(best_error, format_of_choice, combined_best_error, formats_of_choice);
for (i = 0; i < MAX_WEIGHT_MODES; i++)
{
if (qwt_errors[i] >= 1e29f)
{
errors_of_best_combination[i] = 1e30f;
continue;
}
two_partitions_find_best_combination_for_bitcount(combined_best_error, formats_of_choice, qwt_bitcounts[i],
&best_quantization_level, &best_quantization_level_mod, best_formats, &error_of_best_combination);
error_of_best_combination += qwt_errors[i];
errors_of_best_combination[i] = error_of_best_combination;
best_quantization_levels[i] = best_quantization_level;
best_quantization_levels_mod[i] = best_quantization_level_mod;
best_ep_formats[i][0] = best_formats[0];
best_ep_formats[i][1] = best_formats[1];
}
}
// code for the case where the block contains 3 partitions
else if (partition_count == 3)
{
int best_quantization_level;
int best_quantization_level_mod;
int best_formats[3];
float error_of_best_combination;
float combined_best_error[21][10];
int formats_of_choice[21][10][3];
three_partitions_find_best_combination_for_every_quantization_and_integer_count(best_error, format_of_choice, combined_best_error, formats_of_choice);
for (i = 0; i < MAX_WEIGHT_MODES; i++)
{
if (qwt_errors[i] >= 1e29f)
{
errors_of_best_combination[i] = 1e30f;
continue;
}
three_partitions_find_best_combination_for_bitcount(combined_best_error,
formats_of_choice, qwt_bitcounts[i], &best_quantization_level, &best_quantization_level_mod, best_formats, &error_of_best_combination);
error_of_best_combination += qwt_errors[i];
errors_of_best_combination[i] = error_of_best_combination;
best_quantization_levels[i] = best_quantization_level;
best_quantization_levels_mod[i] = best_quantization_level_mod;
best_ep_formats[i][0] = best_formats[0];
best_ep_formats[i][1] = best_formats[1];
best_ep_formats[i][2] = best_formats[2];
}
}
// code for the case where the block contains 4 partitions
else if (partition_count == 4)
{
int best_quantization_level;
int best_quantization_level_mod;
int best_formats[4];
float error_of_best_combination;
float combined_best_error[21][13];
int formats_of_choice[21][13][4];
four_partitions_find_best_combination_for_every_quantization_and_integer_count(best_error, format_of_choice, combined_best_error, formats_of_choice);
for (i = 0; i < MAX_WEIGHT_MODES; i++)
{
if (qwt_errors[i] >= 1e29f)
{
errors_of_best_combination[i] = 1e30f;
continue;
}
four_partitions_find_best_combination_for_bitcount(combined_best_error,
formats_of_choice, qwt_bitcounts[i], &best_quantization_level, &best_quantization_level_mod, best_formats, &error_of_best_combination);
error_of_best_combination += qwt_errors[i];
errors_of_best_combination[i] = error_of_best_combination;
best_quantization_levels[i] = best_quantization_level;
best_quantization_levels_mod[i] = best_quantization_level_mod;
best_ep_formats[i][0] = best_formats[0];
best_ep_formats[i][1] = best_formats[1];
best_ep_formats[i][2] = best_formats[2];
best_ep_formats[i][3] = best_formats[3];
}
}
// finally, go through the results and pick the 4 best-looking modes.
int best_error_weights[4];
for (i = 0; i < 4; i++)
{
float best_ep_error = 1e30f;
int best_error_index = -1;
for (j = 0; j < MAX_WEIGHT_MODES; j++)
{
if (errors_of_best_combination[j] < best_ep_error && best_quantization_levels[j] >= 5)
{
best_ep_error = errors_of_best_combination[j];
best_error_index = j;
}
}
best_error_weights[i] = best_error_index;
if(best_error_index >= 0)
{
errors_of_best_combination[best_error_index] = 1e30f;
}
}
for (i = 0; i < 4; i++)
{
quantized_weight[i] = best_error_weights[i];
if (quantized_weight[i] >= 0)
{
quantization_level[i] = best_quantization_levels[best_error_weights[i]];
quantization_level_mod[i] = best_quantization_levels_mod[best_error_weights[i]];
for (j = 0; j < partition_count; j++)
{
partition_format_specifiers[i][j] = best_ep_formats[best_error_weights[i]][j];
}
}
}
}

558
3rdparty/astc/astc_quantization.cpp vendored Normal file
View File

@@ -0,0 +1,558 @@
/*----------------------------------------------------------------------------*/
/**
* This confidential and proprietary software may be used only as
* authorised by a licensing agreement from ARM Limited
* (C) COPYRIGHT 2011-2012 ARM Limited
* ALL RIGHTS RESERVED
*
* The entire notice above must be reproduced on all authorised
* copies and copies may only be made to the extent permitted
* by a licensing agreement from ARM Limited.
*
* @brief Functions and data table related to data quantization in ASTC.
*/
/*----------------------------------------------------------------------------*/
#include "astc_codec_internals.h"
const uint8_t color_quantization_tables[21][256] = {
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
},
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
},
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
},
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
},
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
},
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
},
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
},
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
},
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15,
},
{
0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1, 1, 1, 1,
},
{
0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 13, 13, 13,
13, 13, 13, 13, 13, 13, 13, 13, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1, 1, 1,
},
{
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2,
2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4,
4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6,
6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8,
8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10,
10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12,
12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13,
14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15,
16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17,
18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19,
19, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21,
21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23,
23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25,
25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29,
29, 29, 29, 30, 30, 30, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31,
},
{
0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16,
16, 24, 24, 24, 24, 24, 24, 32, 32, 32, 32, 32, 32, 32, 2, 2,
2, 2, 2, 2, 10, 10, 10, 10, 10, 10, 10, 18, 18, 18, 18, 18,
18, 26, 26, 26, 26, 26, 26, 26, 34, 34, 34, 34, 34, 34, 4, 4,
4, 4, 4, 4, 4, 12, 12, 12, 12, 12, 12, 20, 20, 20, 20, 20,
20, 20, 28, 28, 28, 28, 28, 28, 36, 36, 36, 36, 36, 36, 36, 6,
6, 6, 6, 6, 6, 14, 14, 14, 14, 14, 14, 14, 22, 22, 22, 22,
22, 22, 30, 30, 30, 30, 30, 30, 30, 38, 38, 38, 38, 38, 38, 38,
39, 39, 39, 39, 39, 39, 39, 31, 31, 31, 31, 31, 31, 31, 23, 23,
23, 23, 23, 23, 15, 15, 15, 15, 15, 15, 15, 7, 7, 7, 7, 7,
7, 37, 37, 37, 37, 37, 37, 37, 29, 29, 29, 29, 29, 29, 21, 21,
21, 21, 21, 21, 21, 13, 13, 13, 13, 13, 13, 5, 5, 5, 5, 5,
5, 5, 35, 35, 35, 35, 35, 35, 27, 27, 27, 27, 27, 27, 27, 19,
19, 19, 19, 19, 19, 11, 11, 11, 11, 11, 11, 11, 3, 3, 3, 3,
3, 3, 33, 33, 33, 33, 33, 33, 33, 25, 25, 25, 25, 25, 25, 17,
17, 17, 17, 17, 17, 17, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1,
},
{
0, 0, 0, 16, 16, 16, 16, 16, 16, 32, 32, 32, 32, 32, 2, 2,
2, 2, 2, 18, 18, 18, 18, 18, 18, 34, 34, 34, 34, 34, 4, 4,
4, 4, 4, 4, 20, 20, 20, 20, 20, 36, 36, 36, 36, 36, 6, 6,
6, 6, 6, 6, 22, 22, 22, 22, 22, 38, 38, 38, 38, 38, 38, 8,
8, 8, 8, 8, 24, 24, 24, 24, 24, 24, 40, 40, 40, 40, 40, 10,
10, 10, 10, 10, 26, 26, 26, 26, 26, 26, 42, 42, 42, 42, 42, 12,
12, 12, 12, 12, 12, 28, 28, 28, 28, 28, 44, 44, 44, 44, 44, 14,
14, 14, 14, 14, 14, 30, 30, 30, 30, 30, 46, 46, 46, 46, 46, 46,
47, 47, 47, 47, 47, 47, 31, 31, 31, 31, 31, 15, 15, 15, 15, 15,
15, 45, 45, 45, 45, 45, 29, 29, 29, 29, 29, 13, 13, 13, 13, 13,
13, 43, 43, 43, 43, 43, 27, 27, 27, 27, 27, 27, 11, 11, 11, 11,
11, 41, 41, 41, 41, 41, 25, 25, 25, 25, 25, 25, 9, 9, 9, 9,
9, 39, 39, 39, 39, 39, 39, 23, 23, 23, 23, 23, 7, 7, 7, 7,
7, 7, 37, 37, 37, 37, 37, 21, 21, 21, 21, 21, 5, 5, 5, 5,
5, 5, 35, 35, 35, 35, 35, 19, 19, 19, 19, 19, 19, 3, 3, 3,
3, 3, 33, 33, 33, 33, 33, 17, 17, 17, 17, 17, 17, 1, 1, 1,
},
{
0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4,
4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8,
8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12,
12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16,
16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19,
20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23,
24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27,
28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31,
32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35,
36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39,
40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43,
44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47,
47, 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51,
51, 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55,
55, 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59,
59, 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63,
},
{
0, 0, 16, 16, 16, 32, 32, 32, 48, 48, 48, 48, 64, 64, 64, 2,
2, 2, 18, 18, 18, 34, 34, 34, 50, 50, 50, 50, 66, 66, 66, 4,
4, 4, 20, 20, 20, 36, 36, 36, 36, 52, 52, 52, 68, 68, 68, 6,
6, 6, 22, 22, 22, 38, 38, 38, 38, 54, 54, 54, 70, 70, 70, 8,
8, 8, 24, 24, 24, 24, 40, 40, 40, 56, 56, 56, 72, 72, 72, 10,
10, 10, 26, 26, 26, 26, 42, 42, 42, 58, 58, 58, 74, 74, 74, 12,
12, 12, 12, 28, 28, 28, 44, 44, 44, 60, 60, 60, 76, 76, 76, 14,
14, 14, 14, 30, 30, 30, 46, 46, 46, 62, 62, 62, 78, 78, 78, 78,
79, 79, 79, 79, 63, 63, 63, 47, 47, 47, 31, 31, 31, 15, 15, 15,
15, 77, 77, 77, 61, 61, 61, 45, 45, 45, 29, 29, 29, 13, 13, 13,
13, 75, 75, 75, 59, 59, 59, 43, 43, 43, 27, 27, 27, 27, 11, 11,
11, 73, 73, 73, 57, 57, 57, 41, 41, 41, 25, 25, 25, 25, 9, 9,
9, 71, 71, 71, 55, 55, 55, 39, 39, 39, 39, 23, 23, 23, 7, 7,
7, 69, 69, 69, 53, 53, 53, 37, 37, 37, 37, 21, 21, 21, 5, 5,
5, 67, 67, 67, 51, 51, 51, 51, 35, 35, 35, 19, 19, 19, 3, 3,
3, 65, 65, 65, 49, 49, 49, 49, 33, 33, 33, 17, 17, 17, 1, 1,
},
{
0, 0, 32, 32, 64, 64, 64, 2, 2, 2, 34, 34, 66, 66, 66, 4,
4, 4, 36, 36, 68, 68, 68, 6, 6, 6, 38, 38, 70, 70, 70, 8,
8, 8, 40, 40, 40, 72, 72, 10, 10, 10, 42, 42, 42, 74, 74, 12,
12, 12, 44, 44, 44, 76, 76, 14, 14, 14, 46, 46, 46, 78, 78, 16,
16, 16, 48, 48, 48, 80, 80, 80, 18, 18, 50, 50, 50, 82, 82, 82,
20, 20, 52, 52, 52, 84, 84, 84, 22, 22, 54, 54, 54, 86, 86, 86,
24, 24, 56, 56, 56, 88, 88, 88, 26, 26, 58, 58, 58, 90, 90, 90,
28, 28, 60, 60, 60, 92, 92, 92, 30, 30, 62, 62, 62, 94, 94, 94,
95, 95, 95, 63, 63, 63, 31, 31, 93, 93, 93, 61, 61, 61, 29, 29,
91, 91, 91, 59, 59, 59, 27, 27, 89, 89, 89, 57, 57, 57, 25, 25,
87, 87, 87, 55, 55, 55, 23, 23, 85, 85, 85, 53, 53, 53, 21, 21,
83, 83, 83, 51, 51, 51, 19, 19, 81, 81, 81, 49, 49, 49, 17, 17,
17, 79, 79, 47, 47, 47, 15, 15, 15, 77, 77, 45, 45, 45, 13, 13,
13, 75, 75, 43, 43, 43, 11, 11, 11, 73, 73, 41, 41, 41, 9, 9,
9, 71, 71, 71, 39, 39, 7, 7, 7, 69, 69, 69, 37, 37, 5, 5,
5, 67, 67, 67, 35, 35, 3, 3, 3, 65, 65, 65, 33, 33, 1, 1,
},
{
0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7,
8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15,
16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23,
24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30, 30, 31, 31,
32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37, 38, 38, 39, 39,
40, 40, 41, 41, 42, 42, 43, 43, 44, 44, 45, 45, 46, 46, 47, 47,
48, 48, 49, 49, 50, 50, 51, 51, 52, 52, 53, 53, 54, 54, 55, 55,
56, 56, 57, 57, 58, 58, 59, 59, 60, 60, 61, 61, 62, 62, 63, 63,
64, 64, 65, 65, 66, 66, 67, 67, 68, 68, 69, 69, 70, 70, 71, 71,
72, 72, 73, 73, 74, 74, 75, 75, 76, 76, 77, 77, 78, 78, 79, 79,
80, 80, 81, 81, 82, 82, 83, 83, 84, 84, 85, 85, 86, 86, 87, 87,
88, 88, 89, 89, 90, 90, 91, 91, 92, 92, 93, 93, 94, 94, 95, 95,
96, 96, 97, 97, 98, 98, 99, 99, 100, 100, 101, 101, 102, 102, 103, 103,
104, 104, 105, 105, 106, 106, 107, 107, 108, 108, 109, 109, 110, 110, 111, 111,
112, 112, 113, 113, 114, 114, 115, 115, 116, 116, 117, 117, 118, 118, 119, 119,
120, 120, 121, 121, 122, 122, 123, 123, 124, 124, 125, 125, 126, 126, 127, 127,
},
{
0, 32, 32, 64, 96, 96, 128, 128, 2, 34, 34, 66, 98, 98, 130, 130,
4, 36, 36, 68, 100, 100, 132, 132, 6, 38, 38, 70, 102, 102, 134, 134,
8, 40, 40, 72, 104, 104, 136, 136, 10, 42, 42, 74, 106, 106, 138, 138,
12, 44, 44, 76, 108, 108, 140, 140, 14, 46, 46, 78, 110, 110, 142, 142,
16, 48, 48, 80, 112, 112, 144, 144, 18, 50, 50, 82, 114, 114, 146, 146,
20, 52, 52, 84, 116, 116, 148, 148, 22, 54, 54, 86, 118, 118, 150, 150,
24, 56, 56, 88, 120, 120, 152, 152, 26, 58, 58, 90, 122, 122, 154, 154,
28, 60, 60, 92, 124, 124, 156, 156, 30, 62, 62, 94, 126, 126, 158, 158,
159, 159, 127, 127, 95, 63, 63, 31, 157, 157, 125, 125, 93, 61, 61, 29,
155, 155, 123, 123, 91, 59, 59, 27, 153, 153, 121, 121, 89, 57, 57, 25,
151, 151, 119, 119, 87, 55, 55, 23, 149, 149, 117, 117, 85, 53, 53, 21,
147, 147, 115, 115, 83, 51, 51, 19, 145, 145, 113, 113, 81, 49, 49, 17,
143, 143, 111, 111, 79, 47, 47, 15, 141, 141, 109, 109, 77, 45, 45, 13,
139, 139, 107, 107, 75, 43, 43, 11, 137, 137, 105, 105, 73, 41, 41, 9,
135, 135, 103, 103, 71, 39, 39, 7, 133, 133, 101, 101, 69, 37, 37, 5,
131, 131, 99, 99, 67, 35, 35, 3, 129, 129, 97, 97, 65, 33, 33, 1,
},
{
0, 64, 128, 128, 2, 66, 130, 130, 4, 68, 132, 132, 6, 70, 134, 134,
8, 72, 136, 136, 10, 74, 138, 138, 12, 76, 140, 140, 14, 78, 142, 142,
16, 80, 144, 144, 18, 82, 146, 146, 20, 84, 148, 148, 22, 86, 150, 150,
24, 88, 152, 152, 26, 90, 154, 154, 28, 92, 156, 156, 30, 94, 158, 158,
32, 96, 160, 160, 34, 98, 162, 162, 36, 100, 164, 164, 38, 102, 166, 166,
40, 104, 168, 168, 42, 106, 170, 170, 44, 108, 172, 172, 46, 110, 174, 174,
48, 112, 176, 176, 50, 114, 178, 178, 52, 116, 180, 180, 54, 118, 182, 182,
56, 120, 184, 184, 58, 122, 186, 186, 60, 124, 188, 188, 62, 126, 190, 190,
191, 191, 127, 63, 189, 189, 125, 61, 187, 187, 123, 59, 185, 185, 121, 57,
183, 183, 119, 55, 181, 181, 117, 53, 179, 179, 115, 51, 177, 177, 113, 49,
175, 175, 111, 47, 173, 173, 109, 45, 171, 171, 107, 43, 169, 169, 105, 41,
167, 167, 103, 39, 165, 165, 101, 37, 163, 163, 99, 35, 161, 161, 97, 33,
159, 159, 95, 31, 157, 157, 93, 29, 155, 155, 91, 27, 153, 153, 89, 25,
151, 151, 87, 23, 149, 149, 85, 21, 147, 147, 83, 19, 145, 145, 81, 17,
143, 143, 79, 15, 141, 141, 77, 13, 139, 139, 75, 11, 137, 137, 73, 9,
135, 135, 71, 7, 133, 133, 69, 5, 131, 131, 67, 3, 129, 129, 65, 1,
},
{
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255,
},
};
const uint8_t color_unquantization_tables[21][256] = {
{
0, 255,
},
{
0, 128, 255,
},
{
0, 85, 170, 255,
},
{
0, 64, 128, 192, 255,
},
{
0, 255, 51, 204, 102, 153,
},
{
0, 36, 73, 109, 146, 182, 219, 255,
},
{
0, 255, 28, 227, 56, 199, 84, 171, 113, 142,
},
{
0, 255, 69, 186, 23, 232, 92, 163, 46, 209, 116, 139,
},
{
0, 17, 34, 51, 68, 85, 102, 119, 136, 153, 170, 187, 204, 221, 238, 255,
},
{
0, 255, 67, 188, 13, 242, 80, 175, 27, 228, 94, 161, 40, 215, 107, 148,
54, 201, 121, 134,
},
{
0, 255, 33, 222, 66, 189, 99, 156, 11, 244, 44, 211, 77, 178, 110, 145,
22, 233, 55, 200, 88, 167, 121, 134,
},
{
0, 8, 16, 24, 33, 41, 49, 57, 66, 74, 82, 90, 99, 107, 115, 123,
132, 140, 148, 156, 165, 173, 181, 189, 198, 206, 214, 222, 231, 239, 247, 255,
},
{
0, 255, 32, 223, 65, 190, 97, 158, 6, 249, 39, 216, 71, 184, 104, 151,
13, 242, 45, 210, 78, 177, 110, 145, 19, 236, 52, 203, 84, 171, 117, 138,
26, 229, 58, 197, 91, 164, 123, 132,
},
{
0, 255, 16, 239, 32, 223, 48, 207, 65, 190, 81, 174, 97, 158, 113, 142,
5, 250, 21, 234, 38, 217, 54, 201, 70, 185, 86, 169, 103, 152, 119, 136,
11, 244, 27, 228, 43, 212, 59, 196, 76, 179, 92, 163, 108, 147, 124, 131,
},
{
0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60,
65, 69, 73, 77, 81, 85, 89, 93, 97, 101, 105, 109, 113, 117, 121, 125,
130, 134, 138, 142, 146, 150, 154, 158, 162, 166, 170, 174, 178, 182, 186, 190,
195, 199, 203, 207, 211, 215, 219, 223, 227, 231, 235, 239, 243, 247, 251, 255,
},
{
0, 255, 16, 239, 32, 223, 48, 207, 64, 191, 80, 175, 96, 159, 112, 143,
3, 252, 19, 236, 35, 220, 51, 204, 67, 188, 83, 172, 100, 155, 116, 139,
6, 249, 22, 233, 38, 217, 54, 201, 71, 184, 87, 168, 103, 152, 119, 136,
9, 246, 25, 230, 42, 213, 58, 197, 74, 181, 90, 165, 106, 149, 122, 133,
13, 242, 29, 226, 45, 210, 61, 194, 77, 178, 93, 162, 109, 146, 125, 130,
},
{
0, 255, 8, 247, 16, 239, 24, 231, 32, 223, 40, 215, 48, 207, 56, 199,
64, 191, 72, 183, 80, 175, 88, 167, 96, 159, 104, 151, 112, 143, 120, 135,
2, 253, 10, 245, 18, 237, 26, 229, 35, 220, 43, 212, 51, 204, 59, 196,
67, 188, 75, 180, 83, 172, 91, 164, 99, 156, 107, 148, 115, 140, 123, 132,
5, 250, 13, 242, 21, 234, 29, 226, 37, 218, 45, 210, 53, 202, 61, 194,
70, 185, 78, 177, 86, 169, 94, 161, 102, 153, 110, 145, 118, 137, 126, 129,
},
{
0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30,
32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62,
64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94,
96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126,
129, 131, 133, 135, 137, 139, 141, 143, 145, 147, 149, 151, 153, 155, 157, 159,
161, 163, 165, 167, 169, 171, 173, 175, 177, 179, 181, 183, 185, 187, 189, 191,
193, 195, 197, 199, 201, 203, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223,
225, 227, 229, 231, 233, 235, 237, 239, 241, 243, 245, 247, 249, 251, 253, 255,
},
{
0, 255, 8, 247, 16, 239, 24, 231, 32, 223, 40, 215, 48, 207, 56, 199,
64, 191, 72, 183, 80, 175, 88, 167, 96, 159, 104, 151, 112, 143, 120, 135,
1, 254, 9, 246, 17, 238, 25, 230, 33, 222, 41, 214, 49, 206, 57, 198,
65, 190, 73, 182, 81, 174, 89, 166, 97, 158, 105, 150, 113, 142, 121, 134,
3, 252, 11, 244, 19, 236, 27, 228, 35, 220, 43, 212, 51, 204, 59, 196,
67, 188, 75, 180, 83, 172, 91, 164, 99, 156, 107, 148, 115, 140, 123, 132,
4, 251, 12, 243, 20, 235, 28, 227, 36, 219, 44, 211, 52, 203, 60, 195,
68, 187, 76, 179, 84, 171, 92, 163, 100, 155, 108, 147, 116, 139, 124, 131,
6, 249, 14, 241, 22, 233, 30, 225, 38, 217, 46, 209, 54, 201, 62, 193,
70, 185, 78, 177, 86, 169, 94, 161, 102, 153, 110, 145, 118, 137, 126, 129,
},
{
0, 255, 4, 251, 8, 247, 12, 243, 16, 239, 20, 235, 24, 231, 28, 227,
32, 223, 36, 219, 40, 215, 44, 211, 48, 207, 52, 203, 56, 199, 60, 195,
64, 191, 68, 187, 72, 183, 76, 179, 80, 175, 84, 171, 88, 167, 92, 163,
96, 159, 100, 155, 104, 151, 108, 147, 112, 143, 116, 139, 120, 135, 124, 131,
1, 254, 5, 250, 9, 246, 13, 242, 17, 238, 21, 234, 25, 230, 29, 226,
33, 222, 37, 218, 41, 214, 45, 210, 49, 206, 53, 202, 57, 198, 61, 194,
65, 190, 69, 186, 73, 182, 77, 178, 81, 174, 85, 170, 89, 166, 93, 162,
97, 158, 101, 154, 105, 150, 109, 146, 113, 142, 117, 138, 121, 134, 125, 130,
2, 253, 6, 249, 10, 245, 14, 241, 18, 237, 22, 233, 26, 229, 30, 225,
34, 221, 38, 217, 42, 213, 46, 209, 50, 205, 54, 201, 58, 197, 62, 193,
66, 189, 70, 185, 74, 181, 78, 177, 82, 173, 86, 169, 90, 165, 94, 161,
98, 157, 102, 153, 106, 149, 110, 145, 114, 141, 118, 137, 122, 133, 126, 129,
},
{
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255,
},
};
// quantization_mode_table[integercount/2][bits] gives
// us the quantization level for a given integer count and number of bits that
// the integer may fit into. This is needed for color decoding,
// and for the color encoding.
int quantization_mode_table[17][128];
void build_quantization_mode_table(void)
{
int i, j;
for (i = 0; i <= 16; i++)
for (j = 0; j < 128; j++)
quantization_mode_table[i][j] = -1;
for (i = 0; i < 21; i++)
for (j = 1; j <= 16; j++)
{
int p = compute_ise_bitcount(2 * j, (quantization_method) i);
if (p < 128)
quantization_mode_table[j][p] = i;
}
for (i = 0; i <= 16; i++)
{
int largest_value_so_far = -1;
for (j = 0; j < 128; j++)
{
if (quantization_mode_table[i][j] > largest_value_so_far)
largest_value_so_far = quantization_mode_table[i][j];
else
quantization_mode_table[i][j] = largest_value_so_far;
}
}
}

431
3rdparty/astc/astc_symbolic_physical.cpp vendored Normal file
View File

@@ -0,0 +1,431 @@
/*----------------------------------------------------------------------------*/
/**
* This confidential and proprietary software may be used only as
* authorised by a licensing agreement from ARM Limited
* (C) COPYRIGHT 2011-2012 ARM Limited
* ALL RIGHTS RESERVED
*
* The entire notice above must be reproduced on all authorised
* copies and copies may only be made to the extent permitted
* by a licensing agreement from ARM Limited.
*
* @brief Functions to convert a compressed block between the symbolic and
* the physical representation.
*/
/*----------------------------------------------------------------------------*/
#include "astc_codec_internals.h"
// routine to write up to 8 bits
static inline void write_bits(int value, int bitcount, int bitoffset, uint8_t * ptr)
{
int mask = (1 << bitcount) - 1;
value &= mask;
ptr += bitoffset >> 3;
bitoffset &= 7;
value <<= bitoffset;
mask <<= bitoffset;
mask = ~mask;
ptr[0] &= mask;
ptr[0] |= value;
ptr[1] &= mask >> 8;
ptr[1] |= value >> 8;
}
// routine to read up to 8 bits
static inline int read_bits(int bitcount, int bitoffset, const uint8_t * ptr)
{
int mask = (1 << bitcount) - 1;
ptr += bitoffset >> 3;
bitoffset &= 7;
int value = ptr[0] | (ptr[1] << 8);
value >>= bitoffset;
value &= mask;
return value;
}
int bitrev8(int p)
{
p = ((p & 0xF) << 4) | ((p >> 4) & 0xF);
p = ((p & 0x33) << 2) | ((p >> 2) & 0x33);
p = ((p & 0x55) << 1) | ((p >> 1) & 0x55);
return p;
}
physical_compressed_block symbolic_to_physical(int xdim, int ydim, int zdim, const symbolic_compressed_block * sc)
{
int i, j;
physical_compressed_block res;
if (sc->block_mode == -2)
{
// UNORM16 constant-color block.
// This encodes separate constant-color blocks. There is currently
// no attempt to coalesce them into larger void-extents.
static const uint8_t cbytes[8] = { 0xFC, 0xFD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
for (i = 0; i < 8; i++)
res.data[i] = cbytes[i];
for (i = 0; i < 4; i++)
{
res.data[2 * i + 8] = sc->constant_color[i] & 0xFF;
res.data[2 * i + 9] = (sc->constant_color[i] >> 8) & 0xFF;
}
return res;
}
if (sc->block_mode == -1)
{
// FP16 constant-color block.
// This encodes separate constant-color blocks. There is currently
// no attempt to coalesce them into larger void-extents.
static const uint8_t cbytes[8] = { 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
for (i = 0; i < 8; i++)
res.data[i] = cbytes[i];
for (i = 0; i < 4; i++)
{
res.data[2 * i + 8] = sc->constant_color[i] & 0xFF;
res.data[2 * i + 9] = (sc->constant_color[i] >> 8) & 0xFF;
}
return res;
}
int partition_count = sc->partition_count;
// first, compress the weights. They are encoded as an ordinary
// integer-sequence, then bit-reversed
uint8_t weightbuf[16];
for (i = 0; i < 16; i++)
weightbuf[i] = 0;
const block_size_descriptor *bsd = get_block_size_descriptor(xdim, ydim, zdim);
const decimation_table *const *ixtab2 = bsd->decimation_tables;
int weight_count = ixtab2[bsd->block_modes[sc->block_mode].decimation_mode]->num_weights;
int weight_quantization_method = bsd->block_modes[sc->block_mode].quantization_mode;
int is_dual_plane = bsd->block_modes[sc->block_mode].is_dual_plane;
int real_weight_count = is_dual_plane ? 2 * weight_count : weight_count;
int bits_for_weights = compute_ise_bitcount(real_weight_count,
(quantization_method) weight_quantization_method);
if (is_dual_plane)
{
uint8_t weights[64];
for (i = 0; i < weight_count; i++)
{
weights[2 * i] = sc->plane1_weights[i];
weights[2 * i + 1] = sc->plane2_weights[i];
}
encode_ise(weight_quantization_method, real_weight_count, weights, weightbuf, 0);
}
else
{
encode_ise(weight_quantization_method, weight_count, sc->plane1_weights, weightbuf, 0);
}
for (i = 0; i < 16; i++)
res.data[i] = bitrev8(weightbuf[15 - i]);
write_bits(sc->block_mode, 11, 0, res.data);
write_bits(partition_count - 1, 2, 11, res.data);
int below_weights_pos = 128 - bits_for_weights;
// encode partition index and color endpoint types for blocks with
// 2 or more partitions.
if (partition_count > 1)
{
write_bits(sc->partition_index, 6, 13, res.data);
write_bits(sc->partition_index >> 6, PARTITION_BITS - 6, 19, res.data);
if (sc->color_formats_matched)
{
write_bits(sc->color_formats[0] << 2, 6, 13 + PARTITION_BITS, res.data);
}
else
{
// go through the selected endpoint type classes for each partition
// in order to determine the lowest class present.
int low_class = 4;
for (i = 0; i < partition_count; i++)
{
int class_of_format = sc->color_formats[i] >> 2;
if (class_of_format < low_class)
low_class = class_of_format;
}
if (low_class == 3)
low_class = 2;
int encoded_type = low_class + 1;
int bitpos = 2;
for (i = 0; i < partition_count; i++)
{
int classbit_of_format = (sc->color_formats[i] >> 2) - low_class;
encoded_type |= classbit_of_format << bitpos;
bitpos++;
}
for (i = 0; i < partition_count; i++)
{
int lowbits_of_format = sc->color_formats[i] & 3;
encoded_type |= lowbits_of_format << bitpos;
bitpos += 2;
}
int encoded_type_lowpart = encoded_type & 0x3F;
int encoded_type_highpart = encoded_type >> 6;
int encoded_type_highpart_size = (3 * partition_count) - 4;
int encoded_type_highpart_pos = 128 - bits_for_weights - encoded_type_highpart_size;
write_bits(encoded_type_lowpart, 6, 13 + PARTITION_BITS, res.data);
write_bits(encoded_type_highpart, encoded_type_highpart_size, encoded_type_highpart_pos, res.data);
below_weights_pos -= encoded_type_highpart_size;
}
}
else
write_bits(sc->color_formats[0], 4, 13, res.data);
// in dual-plane mode, encode the color component of the second plane of weights
if (is_dual_plane)
write_bits(sc->plane2_color_component, 2, below_weights_pos - 2, res.data);
// finally, encode the color bits
// first, get hold of all the color components to encode
uint8_t values_to_encode[32];
int valuecount_to_encode = 0;
for (i = 0; i < sc->partition_count; i++)
{
int vals = 2 * (sc->color_formats[i] >> 2) + 2;
for (j = 0; j < vals; j++)
values_to_encode[j + valuecount_to_encode] = sc->color_values[i][j];
valuecount_to_encode += vals;
}
// then, encode an ISE based on them.
encode_ise(sc->color_quantization_level, valuecount_to_encode, values_to_encode, res.data, (sc->partition_count == 1 ? 17 : 19 + PARTITION_BITS));
return res;
}
void physical_to_symbolic(int xdim, int ydim, int zdim, physical_compressed_block pb, symbolic_compressed_block * res)
{
uint8_t bswapped[16];
int i, j;
res->error_block = 0;
// get hold of the block-size descriptor and the decimation tables.
const block_size_descriptor *bsd = get_block_size_descriptor(xdim, ydim, zdim);
const decimation_table *const *ixtab2 = bsd->decimation_tables;
// extract header fields
int block_mode = read_bits(11, 0, pb.data);
if ((block_mode & 0x1FF) == 0x1FC)
{
// void-extent block!
// check what format the data has
if (block_mode & 0x200)
res->block_mode = -1; // floating-point
else
res->block_mode = -2; // unorm16.
res->partition_count = 0;
for (i = 0; i < 4; i++)
{
res->constant_color[i] = pb.data[2 * i + 8] | (pb.data[2 * i + 9] << 8);
}
// additionally, check that the void-extent
if (zdim == 1)
{
// 2D void-extent
int rsvbits = read_bits(2, 10, pb.data);
if (rsvbits != 3)
res->error_block = 1;
int vx_low_s = read_bits(8, 12, pb.data) | (read_bits(5, 12 + 8, pb.data) << 8);
int vx_high_s = read_bits(8, 25, pb.data) | (read_bits(5, 25 + 8, pb.data) << 8);
int vx_low_t = read_bits(8, 38, pb.data) | (read_bits(5, 38 + 8, pb.data) << 8);
int vx_high_t = read_bits(8, 51, pb.data) | (read_bits(5, 51 + 8, pb.data) << 8);
int all_ones = vx_low_s == 0x1FFF && vx_high_s == 0x1FFF && vx_low_t == 0x1FFF && vx_high_t == 0x1FFF;
if ((vx_low_s >= vx_high_s || vx_low_t >= vx_high_t) && !all_ones)
res->error_block = 1;
}
else
{
// 3D void-extent
int vx_low_s = read_bits(9, 10, pb.data);
int vx_high_s = read_bits(9, 19, pb.data);
int vx_low_t = read_bits(9, 28, pb.data);
int vx_high_t = read_bits(9, 37, pb.data);
int vx_low_p = read_bits(9, 46, pb.data);
int vx_high_p = read_bits(9, 55, pb.data);
int all_ones = vx_low_s == 0x1FF && vx_high_s == 0x1FF && vx_low_t == 0x1FF && vx_high_t == 0x1FF && vx_low_p == 0x1FF && vx_high_p == 0x1FF;
if ((vx_low_s >= vx_high_s || vx_low_t >= vx_high_t || vx_low_p >= vx_high_p) && !all_ones)
res->error_block = 1;
}
return;
}
if (bsd->block_modes[block_mode].permit_decode == 0)
{
res->error_block = 1;
return;
}
int weight_count = ixtab2[bsd->block_modes[block_mode].decimation_mode]->num_weights;
int weight_quantization_method = bsd->block_modes[block_mode].quantization_mode;
int is_dual_plane = bsd->block_modes[block_mode].is_dual_plane;
int real_weight_count = is_dual_plane ? 2 * weight_count : weight_count;
int partition_count = read_bits(2, 11, pb.data) + 1;
res->block_mode = block_mode;
res->partition_count = partition_count;
for (i = 0; i < 16; i++)
bswapped[i] = bitrev8(pb.data[15 - i]);
int bits_for_weights = compute_ise_bitcount(real_weight_count,
(quantization_method) weight_quantization_method);
int below_weights_pos = 128 - bits_for_weights;
if (is_dual_plane)
{
uint8_t indices[64];
decode_ise(weight_quantization_method, real_weight_count, bswapped, indices, 0);
for (i = 0; i < weight_count; i++)
{
res->plane1_weights[i] = indices[2 * i];
res->plane2_weights[i] = indices[2 * i + 1];
}
}
else
{
decode_ise(weight_quantization_method, weight_count, bswapped, res->plane1_weights, 0);
}
if (is_dual_plane && partition_count == 4)
res->error_block = 1;
res->color_formats_matched = 0;
// then, determine the format of each endpoint pair
int color_formats[4];
int encoded_type_highpart_size = 0;
if (partition_count == 1)
{
color_formats[0] = read_bits(4, 13, pb.data);
res->partition_index = 0;
}
else
{
encoded_type_highpart_size = (3 * partition_count) - 4;
below_weights_pos -= encoded_type_highpart_size;
int encoded_type = read_bits(6, 13 + PARTITION_BITS, pb.data) | (read_bits(encoded_type_highpart_size, below_weights_pos, pb.data) << 6);
int baseclass = encoded_type & 0x3;
if (baseclass == 0)
{
for (i = 0; i < partition_count; i++)
{
color_formats[i] = (encoded_type >> 2) & 0xF;
}
below_weights_pos += encoded_type_highpart_size;
res->color_formats_matched = 1;
encoded_type_highpart_size = 0;
}
else
{
int bitpos = 2;
baseclass--;
for (i = 0; i < partition_count; i++)
{
color_formats[i] = (((encoded_type >> bitpos) & 1) + baseclass) << 2;
bitpos++;
}
for (i = 0; i < partition_count; i++)
{
color_formats[i] |= (encoded_type >> bitpos) & 3;
bitpos += 2;
}
}
res->partition_index = read_bits(6, 13, pb.data) | (read_bits(PARTITION_BITS - 6, 19, pb.data) << 6);
}
for (i = 0; i < partition_count; i++)
res->color_formats[i] = color_formats[i];
// then, determine the number of integers we need to unpack for the endpoint pairs
int color_integer_count = 0;
for (i = 0; i < partition_count; i++)
{
int endpoint_class = color_formats[i] >> 2;
color_integer_count += (endpoint_class + 1) * 2;
}
if (color_integer_count > 18)
res->error_block = 1;
// then, determine the color endpoint format to use for these integers
static const int color_bits_arr[5] = { -1, 115 - 4, 113 - 4 - PARTITION_BITS, 113 - 4 - PARTITION_BITS, 113 - 4 - PARTITION_BITS };
int color_bits = color_bits_arr[partition_count] - bits_for_weights - encoded_type_highpart_size;
if (is_dual_plane)
color_bits -= 2;
if (color_bits < 0)
color_bits = 0;
int color_quantization_level = quantization_mode_table[color_integer_count >> 1][color_bits];
res->color_quantization_level = color_quantization_level;
if (color_quantization_level < 4)
res->error_block = 1;
// then unpack the integer-bits
uint8_t values_to_decode[32];
decode_ise(color_quantization_level, color_integer_count, pb.data, values_to_decode, (partition_count == 1 ? 17 : 19 + PARTITION_BITS));
// and distribute them over the endpoint types
int valuecount_to_decode = 0;
for (i = 0; i < partition_count; i++)
{
int vals = 2 * (color_formats[i] >> 2) + 2;
for (j = 0; j < vals; j++)
res->color_values[i][j] = values_to_decode[j + valuecount_to_decode];
valuecount_to_decode += vals;
}
// get hold of color component for second-plane in the case of dual plane of weights.
if (is_dual_plane)
res->plane2_color_component = read_bits(2, below_weights_pos - 2, pb.data);
}

598
3rdparty/astc/astc_weight_align.cpp vendored Normal file
View File

@@ -0,0 +1,598 @@
/*----------------------------------------------------------------------------*/
/**
* This confidential and proprietary software may be used only as
* authorised by a licensing agreement from ARM Limited
* (C) COPYRIGHT 2011-2012 ARM Limited
* ALL RIGHTS RESERVED
*
* The entire notice above must be reproduced on all authorised
* copies and copies may only be made to the extent permitted
* by a licensing agreement from ARM Limited.
*
* @brief Angular-sum algorithm for weight alignment.
*
* This algorithm works as follows:
* * we compute a complex number P as (cos s*i, sin s*i) for each
* weight, where i is the input value and s is a scaling factor
* based on the spacing between the weights.
* * we then add together complex numbers for all the weights.
* * we then compute the length and angle of the resulting sum.
*
* This should produce the following results:
* * perfect alignment results in a vector whose length is equal to
* the sum of lengths of all inputs
* * even distribution results in a vector of length 0.
* * all samples identical results in perfect alignment for every
* scaling.
*
* For each scaling factor within a given set, we compute an alignment
* factor from 0 to 1. This should then result in some scalings standing
* out as having particularly good alignment factors; we can use this to
* produce a set of candidate scale/shift values for various quantization
* levels; we should then actually try them and see what happens.
*
* Assuming N quantization steps, the scaling factor becomes s=2*PI*(N-1);
* we should probably have about 1 scaling factor for every 1/4
* quantization step (perhaps 1/8 for low levels of quantization)
*/
/*----------------------------------------------------------------------------*/
#include <math.h>
#include "astc_codec_internals.h"
#ifdef DEBUG_PRINT_DIAGNOSTICS
#include <stdio.h>
#endif
static const float angular_steppings[] = {
1.0, 1.125,
1.25, 1.375,
1.5, 1.625,
1.75, 1.875,
2.0, 2.25, 2.5, 2.75,
3.0, 3.25, 3.5, 3.75,
4.0, 4.25, 4.5, 4.75,
5.0, 5.25, 5.5, 5.75,
6.0, 6.25, 6.5, 6.75,
7.0, 7.25, 7.5, 7.75,
8.0, 8.5,
9.0, 9.5,
10.0, 10.5,
11.0, 11.5,
12.0, 12.5,
13.0, 13.5,
14.0, 14.5,
15.0, 15.5,
16.0, 16.5,
17.0, 17.5,
18.0, 18.5,
19.0, 19.5,
20.0, 20.5,
21.0, 21.5,
22.0, 22.5,
23.0, 23.5,
24.0, 24.5,
25.0, 25.5,
26.0, 26.5,
27.0, 27.5,
28.0, 28.5,
29.0, 29.5,
30.0, 30.5,
31.0, 31.5,
32.0, 32.5,
33.0, 33.5,
34.0, 34.5,
35.0, 35.5,
};
#define ANGULAR_STEPS ((int)(sizeof(angular_steppings)/sizeof(angular_steppings[0])))
static float stepsizes[ANGULAR_STEPS];
static float stepsizes_sqr[ANGULAR_STEPS];
static int max_angular_steps_needed_for_quant_level[13];
// we store sine/cosine values for 64 possible weight values; this causes
// slight quality loss compared to using sin() and cos() directly.
#define SINCOS_STEPS 64
static float sin_table[SINCOS_STEPS][ANGULAR_STEPS];
static float cos_table[SINCOS_STEPS][ANGULAR_STEPS];
void prepare_angular_tables(void)
{
int i, j;
int max_angular_steps_needed_for_quant_steps[40];
for (i = 0; i < ANGULAR_STEPS; i++)
{
stepsizes[i] = 1.0f / angular_steppings[i];
stepsizes_sqr[i] = stepsizes[i] * stepsizes[i];
for (j = 0; j < SINCOS_STEPS; j++)
{
sin_table[j][i] = static_cast < float >(sin((2.0f * M_PI / (SINCOS_STEPS - 1.0f)) * angular_steppings[i] * j));
cos_table[j][i] = static_cast < float >(cos((2.0f * M_PI / (SINCOS_STEPS - 1.0f)) * angular_steppings[i] * j));
}
int p = static_cast < int >(floor(angular_steppings[i])) + 1;
max_angular_steps_needed_for_quant_steps[p] = MIN(i + 1, ANGULAR_STEPS - 1);
}
// yes, the next-to-last entry is supposed to have the value 33. This because under
// ASTC, the 32-weight mode leaves a double-sized hole in the middle of the
// weight space, so we are better off matching 33 weights than 32.
static const int steps_of_level[] = { 2, 3, 4, 5, 6, 8, 10, 12, 16, 20, 24, 33, 36 };
for (i = 0; i < 13; i++)
max_angular_steps_needed_for_quant_level[i] = max_angular_steps_needed_for_quant_steps[steps_of_level[i]];
}
union if32
{
float f;
int32_t s;
uint32_t u;
};
// function to compute angular sums; then, from the
// angular sums, compute alignment factor and offset.
/* static inline */
void compute_angular_offsets(int samplecount, const float *samples, const float *sample_weights, int max_angular_steps, float *offsets)
{
int i, j;
float anglesum_x[ANGULAR_STEPS];
float anglesum_y[ANGULAR_STEPS];
for (i = 0; i < max_angular_steps; i++)
{
anglesum_x[i] = 0;
anglesum_y[i] = 0;
}
// compute the angle-sums.
for (i = 0; i < samplecount; i++)
{
float sample = samples[i];
float sample_weight = sample_weights[i];
if32 p;
p.f = (sample * (SINCOS_STEPS - 1.0f)) + 12582912.0f;
unsigned int isample = p.u & 0x3F;
const float *sinptr = sin_table[isample];
const float *cosptr = cos_table[isample];
for (j = 0; j < max_angular_steps; j++)
{
float cp = cosptr[j];
float sp = sinptr[j];
anglesum_x[j] += cp * sample_weight;
anglesum_y[j] += sp * sample_weight;
}
}
// post-process the angle-sums
for (i = 0; i < max_angular_steps; i++)
{
float angle = atan2(anglesum_y[i], anglesum_x[i]); // positive angle -> positive offset
offsets[i] = angle * (stepsizes[i] * (1.0f / (2.0f * (float)M_PI)));
}
}
// for a given step-size and a given offset, compute the
// lowest and highest weight that results from quantizing using the stepsize & offset.
// also, compute the resulting error.
/* static inline */
void compute_lowest_and_highest_weight(int samplecount, const float *samples, const float *sample_weights,
int max_angular_steps, const float *offsets,
int8_t * lowest_weight, int8_t * highest_weight,
float *error, float *cut_low_weight_error, float *cut_high_weight_error)
{
int i;
int sp;
float error_from_forcing_weight_down[60];
float error_from_forcing_weight_either_way[60];
for (i = 0; i < 60; i++)
{
error_from_forcing_weight_down[i] = 0;
error_from_forcing_weight_either_way[i] = 0;
}
// weight + 12
static const unsigned int idxtab[256] = {
12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27,
28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43,
44, 45, 46, 47, 48, 49, 50, 51,
52, 53, 54, 55, 55, 55, 55, 55,
55, 55, 55, 55, 55, 55, 55, 55,
55, 55, 55, 55, 55, 55, 55, 55,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 2, 3,
4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27,
28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43,
44, 45, 46, 47, 48, 49, 50, 51,
52, 53, 54, 55, 55, 55, 55, 55,
55, 55, 55, 55, 55, 55, 55, 55,
55, 55, 55, 55, 55, 55, 55, 55,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 2, 3,
4, 5, 6, 7, 8, 9, 10, 11
};
for (sp = 0; sp < max_angular_steps; sp++)
{
unsigned int minidx_bias12 = 55;
unsigned int maxidx_bias12 = 0;
float errval = 0.0f;
float rcp_stepsize = angular_steppings[sp];
float offset = offsets[sp];
float scaled_offset = rcp_stepsize * offset;
for (i = 0; i < samplecount - 1; i += 2)
{
float wt1 = sample_weights[i];
float wt2 = sample_weights[i + 1];
if32 p1, p2;
float sval1 = (samples[i] * rcp_stepsize) - scaled_offset;
float sval2 = (samples[i + 1] * rcp_stepsize) - scaled_offset;
p1.f = sval1 + 12582912.0f; // FP representation abuse to avoid floor() and float->int conversion
p2.f = sval2 + 12582912.0f; // FP representation abuse to avoid floor() and float->int conversion
float isval1 = p1.f - 12582912.0f;
float isval2 = p2.f - 12582912.0f;
float dif1 = sval1 - isval1;
float dif2 = sval2 - isval2;
errval += (dif1 * wt1) * dif1;
errval += (dif2 * wt2) * dif2;
// table lookups that really perform a minmax function.
unsigned int idx1_bias12 = idxtab[p1.u & 0xFF];
unsigned int idx2_bias12 = idxtab[p2.u & 0xFF];
if (idx1_bias12 < minidx_bias12)
minidx_bias12 = idx1_bias12;
if (idx1_bias12 > maxidx_bias12)
maxidx_bias12 = idx1_bias12;
if (idx2_bias12 < minidx_bias12)
minidx_bias12 = idx2_bias12;
if (idx2_bias12 > maxidx_bias12)
maxidx_bias12 = idx2_bias12;
error_from_forcing_weight_either_way[idx1_bias12] += wt1;
error_from_forcing_weight_down[idx1_bias12] += (dif1 * wt1);
error_from_forcing_weight_either_way[idx2_bias12] += wt2;
error_from_forcing_weight_down[idx2_bias12] += (dif2 * wt2);
}
if (samplecount & 1)
{
i = samplecount - 1;
float wt = sample_weights[i];
if32 p;
float sval = (samples[i] * rcp_stepsize) - scaled_offset;
p.f = sval + 12582912.0f; // FP representation abuse to avoid floor() and float->int conversion
float isval = p.f - 12582912.0f;
float dif = sval - isval;
errval += (dif * wt) * dif;
unsigned int idx_bias12 = idxtab[p.u & 0xFF];
if (idx_bias12 < minidx_bias12)
minidx_bias12 = idx_bias12;
if (idx_bias12 > maxidx_bias12)
maxidx_bias12 = idx_bias12;
error_from_forcing_weight_either_way[idx_bias12] += wt;
error_from_forcing_weight_down[idx_bias12] += dif * wt;
}
lowest_weight[sp] = (int)minidx_bias12 - 12;
highest_weight[sp] = (int)maxidx_bias12 - 12;
error[sp] = errval;
// the cut_(lowest/highest)_weight_error indicate the error that results from
// forcing samples that should have had the (lowest/highest) weight value
// one step (up/down).
cut_low_weight_error[sp] = error_from_forcing_weight_either_way[minidx_bias12] - 2.0f * error_from_forcing_weight_down[minidx_bias12];
cut_high_weight_error[sp] = error_from_forcing_weight_either_way[maxidx_bias12] + 2.0f * error_from_forcing_weight_down[maxidx_bias12];
// clear out the error-from-forcing values we actually used in this pass
// so that these are clean for the next pass.
unsigned int ui;
for (ui = minidx_bias12 & ~0x3; ui <= maxidx_bias12; ui += 4)
{
error_from_forcing_weight_either_way[ui] = 0;
error_from_forcing_weight_down[ui] = 0;
error_from_forcing_weight_either_way[ui + 1] = 0;
error_from_forcing_weight_down[ui + 1] = 0;
error_from_forcing_weight_either_way[ui + 2] = 0;
error_from_forcing_weight_down[ui + 2] = 0;
error_from_forcing_weight_either_way[ui + 3] = 0;
error_from_forcing_weight_down[ui + 3] = 0;
}
}
for (sp = 0; sp < max_angular_steps; sp++)
{
float errscale = stepsizes_sqr[sp];
error[sp] *= errscale;
cut_low_weight_error[sp] *= errscale;
cut_high_weight_error[sp] *= errscale;
}
}
// main function for running the angular algorithm.
void compute_angular_endpoints_for_quantization_levels(int samplecount, const float *samples, const float *sample_weights, int max_quantization_level, float low_value[12], float high_value[12])
{
int i;
max_quantization_level++; // Temporarily increase level - needs refinement
static const int quantization_steps_for_level[13] = { 2, 3, 4, 5, 6, 8, 10, 12, 16, 20, 24, 33, 36 };
int max_quantization_steps = quantization_steps_for_level[max_quantization_level];
float offsets[ANGULAR_STEPS];
int max_angular_steps = max_angular_steps_needed_for_quant_level[max_quantization_level];
compute_angular_offsets(samplecount, samples, sample_weights, max_angular_steps, offsets);
// the +4 offsets are to allow for vectorization within compute_lowest_and_highest_weight().
int8_t lowest_weight[ANGULAR_STEPS + 4];
int8_t highest_weight[ANGULAR_STEPS + 4];
float error[ANGULAR_STEPS + 4];
float cut_low_weight_error[ANGULAR_STEPS + 4];
float cut_high_weight_error[ANGULAR_STEPS + 4];
compute_lowest_and_highest_weight(samplecount, samples, sample_weights, max_angular_steps, offsets, lowest_weight, highest_weight, error, cut_low_weight_error, cut_high_weight_error);
#ifdef DEBUG_PRINT_DIAGNOSTICS
if (print_diagnostics)
{
printf("%s : max-angular-steps=%d \n", __func__, max_angular_steps);
printf("Samplecount=%d, max_quantization_level=%d\n", samplecount, max_quantization_level);
for (i = 0; i < samplecount; i++)
printf("Sample %d : %f (weight %f)\n", i, samples[i], sample_weights[i]);
for (i = 0; i < max_angular_steps; i++)
{
printf("%d: offset=%f error=%f lowest=%d highest=%d cl=%f ch=%f\n", i, offsets[i], error[i], lowest_weight[i], highest_weight[i], cut_low_weight_error[i], cut_high_weight_error[i]);
}
printf("\n");
}
#endif
// for each quantization level, find the best error terms.
float best_errors[40];
int best_scale[40];
uint8_t cut_low_weight[40];
for (i = 0; i < (max_quantization_steps + 4); i++)
{
best_errors[i] = 1e30f;
best_scale[i] = -1; // Indicates no solution found
cut_low_weight[i] = 0;
}
for (i = 0; i < max_angular_steps; i++)
{
int samplecount = highest_weight[i] - lowest_weight[i] + 1;
if (samplecount >= (max_quantization_steps + 4))
{
continue;
}
if (samplecount < 2)
samplecount = 2;
if (best_errors[samplecount] > error[i])
{
best_errors[samplecount] = error[i];
best_scale[samplecount] = i;
cut_low_weight[samplecount] = 0;
}
float error_cut_low = error[i] + cut_low_weight_error[i];
float error_cut_high = error[i] + cut_high_weight_error[i];
float error_cut_low_high = error[i] + cut_low_weight_error[i] + cut_high_weight_error[i];
if (best_errors[samplecount - 1] > error_cut_low)
{
best_errors[samplecount - 1] = error_cut_low;
best_scale[samplecount - 1] = i;
cut_low_weight[samplecount - 1] = 1;
}
if (best_errors[samplecount - 1] > error_cut_high)
{
best_errors[samplecount - 1] = error_cut_high;
best_scale[samplecount - 1] = i;
cut_low_weight[samplecount - 1] = 0;
}
if (best_errors[samplecount - 2] > error_cut_low_high)
{
best_errors[samplecount - 2] = error_cut_low_high;
best_scale[samplecount - 2] = i;
cut_low_weight[samplecount - 2] = 1;
}
}
// if we got a better error-value for a low sample count than for a high one,
// use the low sample count error value for the higher sample count as well.
for (i = 3; i <= max_quantization_steps; i++)
{
if (best_errors[i] > best_errors[i - 1])
{
best_errors[i] = best_errors[i - 1];
best_scale[i] = best_scale[i - 1];
cut_low_weight[i] = cut_low_weight[i - 1];
}
}
max_quantization_level--; // Decrease level again (see corresponding ++, above)
static const int ql_weights[12] = { 2, 3, 4, 5, 6, 8, 10, 12, 16, 20, 24, 33 };
for (i = 0; i <= max_quantization_level; i++)
{
int q = ql_weights[i];
int bsi = best_scale[q];
// Did we find anything?
if(bsi < 0)
{
printf("ERROR: Unable to find an encoding within the specified error limits. Please revise the error limit values and try again.\n");
exit(1);
}
float stepsize = stepsizes[bsi];
int lwi = lowest_weight[bsi] + cut_low_weight[q];
int hwi = lwi + q - 1;
float offset = offsets[bsi];
low_value[i] = offset + lwi * stepsize;
high_value[i] = offset + hwi * stepsize;
}
}
// helper functions that will compute ideal angular-endpoints
// for a given set of weights and a given block size descriptors
void compute_angular_endpoints_1plane(float mode_cutoff, const block_size_descriptor * bsd,
const float *decimated_quantized_weights, const float *decimated_weights,
float low_value[MAX_WEIGHT_MODES], float high_value[MAX_WEIGHT_MODES])
{
int i;
float low_values[MAX_DECIMATION_MODES][12];
float high_values[MAX_DECIMATION_MODES][12];
for (i = 0; i < MAX_DECIMATION_MODES; i++)
{
int samplecount = bsd->decimation_mode_samples[i];
int quant_mode = bsd->decimation_mode_maxprec_1plane[i];
float percentile = bsd->decimation_mode_percentile[i];
int permit_encode = bsd->permit_encode[i];
if (permit_encode == 0 || samplecount < 1 || quant_mode < 0 || percentile > mode_cutoff)
continue;
compute_angular_endpoints_for_quantization_levels(samplecount,
decimated_quantized_weights + i * MAX_WEIGHTS_PER_BLOCK,
decimated_weights + i * MAX_WEIGHTS_PER_BLOCK, quant_mode, low_values[i], high_values[i]);
}
for (i = 0; i < MAX_WEIGHT_MODES; i++)
{
if (bsd->block_modes[i].is_dual_plane != 0 || bsd->block_modes[i].percentile > mode_cutoff)
continue;
int quant_mode = bsd->block_modes[i].quantization_mode;
int decim_mode = bsd->block_modes[i].decimation_mode;
low_value[i] = low_values[decim_mode][quant_mode];
high_value[i] = high_values[decim_mode][quant_mode];
}
}
void compute_angular_endpoints_2planes(float mode_cutoff,
const block_size_descriptor * bsd,
const float *decimated_quantized_weights,
const float *decimated_weights,
float low_value1[MAX_WEIGHT_MODES], float high_value1[MAX_WEIGHT_MODES], float low_value2[MAX_WEIGHT_MODES], float high_value2[MAX_WEIGHT_MODES])
{
int i;
float low_values1[MAX_DECIMATION_MODES][12];
float high_values1[MAX_DECIMATION_MODES][12];
float low_values2[MAX_DECIMATION_MODES][12];
float high_values2[MAX_DECIMATION_MODES][12];
for (i = 0; i < MAX_DECIMATION_MODES; i++)
{
int samplecount = bsd->decimation_mode_samples[i];
int quant_mode = bsd->decimation_mode_maxprec_2planes[i];
float percentile = bsd->decimation_mode_percentile[i];
int permit_encode = bsd->permit_encode[i];
if (permit_encode == 0 || samplecount < 1 || quant_mode < 0 || percentile > mode_cutoff)
continue;
compute_angular_endpoints_for_quantization_levels(samplecount,
decimated_quantized_weights + 2 * i * MAX_WEIGHTS_PER_BLOCK,
decimated_weights + 2 * i * MAX_WEIGHTS_PER_BLOCK, quant_mode, low_values1[i], high_values1[i]);
compute_angular_endpoints_for_quantization_levels(samplecount,
decimated_quantized_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK,
decimated_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK, quant_mode, low_values2[i], high_values2[i]);
}
for (i = 0; i < MAX_WEIGHT_MODES; i++)
{
if (bsd->block_modes[i].is_dual_plane != 1 || bsd->block_modes[i].percentile > mode_cutoff)
continue;
int quant_mode = bsd->block_modes[i].quantization_mode;
int decim_mode = bsd->block_modes[i].decimation_mode;
low_value1[i] = low_values1[decim_mode][quant_mode];
high_value1[i] = high_values1[decim_mode][quant_mode];
low_value2[i] = low_values2[decim_mode][quant_mode];
high_value2[i] = high_values2[decim_mode][quant_mode];
}
}

File diff suppressed because it is too large Load Diff

137
3rdparty/astc/license.txt vendored Normal file
View File

@@ -0,0 +1,137 @@
END USER LICENCE AGREEMENT FOR THE MALI ASTC SPECIFICATION AND SOFTWARE CODEC,
VERSION: 1.3
THIS END USER LICENCE AGREEMENT ("LICENCE") IS A LEGAL AGREEMENT BETWEEN YOU
(EITHER A SINGLE INDIVIDUAL, OR SINGLE LEGAL ENTITY) AND ARM LIMITED ("ARM")
FOR THE USE OF THE SOFTWARE ACCOMPANYING THIS LICENCE. ARM IS ONLY WILLING
TO LICENSE THE SOFTWARE TO YOU ON CONDITION THAT YOU ACCEPT ALL OF THE TERMS
IN THIS LICENCE. BY CLICKING "I AGREE" OR BY INSTALLING OR OTHERWISE USING
OR COPYING THE SOFTWARE YOU INDICATE THAT YOU AGREE TO BE BOUND BY ALL THE
TERMS OF THIS LICENCE.
IF YOU DO NOT AGREE TO THE TERMS OF THIS LICENCE, ARM IS UNWILLING TO LICENSE
THE SOFTWARE TO YOU AND YOU MAY NOT INSTALL, USE OR COPY THE SOFTWARE.
1. DEFINITIONS.
"Authorised Purpose" means the use of the Software solely to develop products
and tools which implement the Khronos ASTC specification to;
(i) compress texture images into ASTC format ("Compression Results");
(ii) distribute such Compression Results to third parties; and
(iii) decompress texture images stored in ASTC format.
"Software" means the source code and Software binaries accompanying this
Licence, and any printed, electronic or online documentation supplied with it,
in all cases relating to the MALI ASTC SPECIFICATION AND SOFTWARE CODEC.
2. LICENCE GRANT.
ARM hereby grants to you, subject to the terms and conditions of this Licence,
a nonexclusive, nontransferable, free of charge, royalty free, worldwide
licence to use, copy, modify and (subject to Clause 3 below) distribute the
Software solely for the Authorised Purpose.
No right is granted to use the Software to develop hardware.
Notwithstanding the foregoing, nothing in this Licence prevents you from
using the Software to develop products that conform to an application
programming interface specification issued by The Khronos Group Inc.
("Khronos"), provided that you have licences to develop such products
under the relevant Khronos agreements.
3. RESTRICTIONS ON USE OF THE SOFTWARE.
RESTRICTIONS ON TRANSFER OF LICENSED RIGHTS: The rights granted to you under
this Licence may not be assigned by you to any third party without the prior
written consent of ARM.
TITLE AND RESERVATION OF RIGHTS: You acquire no rights to the Software other
than as expressly provided by this Licence. The Software is licensed not sold.
ARM does not transfer title to the Software to you. In no event shall the
licences granted in Clause 2 be construed as granting you expressly or by
implication, estoppel or otherwise, licences to any ARM technology other than
the Software.
NOTICES: You shall not remove from the Software any copyright notice or other
notice (whether ARM's or its licensor's), and you shall ensure that any such
notice is reproduced in any copies of the whole or any part of the Software
made by you. You shall not use ARM's or its licensor's name, logo or
trademarks to market Compression Results. If you distribute the Software to a
third party, you agree to include a copy of this Licence with such
distribution.
4. NO SUPPORT.
ARM has no obligation to support or to continue providing or updating any of
the Software.
5. NO WARRANTIES.
YOU AGREE THAT THE SOFTWARE IS LICENSED "AS IS", AND THAT ARM EXPRESSLY
DISCLAIMS ALL REPRESENTATIONS, WARRANTIES, CONDITIONS OR OTHER TERMS, EXPRESS,
IMPLIED OR STATUTORY, TO THE FULLEST EXTENT PERMITTED BY LAW. YOU EXPRESSLY
ASSUME ALL LIABILITIES AND RISKS, FOR USE OR OPERATION OF ANY APPLICATION
PROGRAMS YOU CREATE WITH THE SOFTWARE, AND YOU ASSUME THE ENTIRE COST OF ALL
NECESSARY SERVICING, REPAIR OR CORRECTION.
6. LIMITATION OF LIABILITY.
TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT SHALL ARM BE
LIABLE FOR ANY INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES
(INCLUDING LOSS OF PROFITS) ARISING OUT OF THE USE OR INABILITY TO USE THE
SOFTWARE WHETHER BASED ON A CLAIM UNDER CONTRACT, TORT OR OTHER LEGAL THEORY,
EVEN IF ARM WAS ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
ARM does not seek to limit or exclude liability for death or personal injury
arising from ARM's negligence and because some jurisdictions do not permit the
exclusion or limitation of liability for consequential or incidental damages
the above limitation relating to liability for consequential damages may not
apply to you.
NOTWITHSTANDING ANYTHING TO THE CONTRARY CONTAINED IN THIS LICENCE, THE
MAXIMUM LIABILITY OF ARM TO YOU IN AGGREGATE FOR ALL CLAIMS MADE AGAINST ARM
IN CONTRACT TORT OR OTHERWISE UNDER OR IN CONNECTION WITH THE SUBJECT MATTER
OF THIS LICENCE SHALL NOT EXCEED THE GREATER OF THE TOTAL OF SUMS PAID BY YOU
TO ARM (IF ANY) FOR THIS LICENCE AND US$5.00.
7. U.S. GOVERNMENT END USERS.
US Government Restrictions: Use, duplication, reproduction, release,
modification, disclosure or transfer of this commercial product and
accompanying documentation is restricted in accordance with the terms
of this Licence.
8. TERM AND TERMINATION.
This Licence shall remain in force until terminated by you or by ARM. Without
prejudice to any of its other rights if you are in breach of any of the terms
and conditions of this Licence then ARM may terminate this Licence immediately
upon giving written notice to you. You may terminate this Licence at any time.
Upon termination of this Licence by you or by ARM you shall stop using the
Software and destroy all copies of the Software in your possession together
with all documentation and related materials. The provisions of Clauses 1, 3,
4, 5, 6, 7, 8 and 9 shall survive termination of this Licence.
9. GENERAL.
This Licence is governed by English Law. Except where ARM agrees otherwise in
a written contract signed by you and ARM, this is the only agreement between
you and ARM relating to the Software and it may only be modified by written
agreement between you and ARM. Except as expressly agreed in writing, this
Licence may not be modified by purchase orders, advertising or other
representation by any person. If any clause in this Licence is held by a court
of law to be illegal or unenforceable the remaining provisions of this Licence
shall not be affected thereby. The failure by ARM to enforce any of the
provisions of this Licence, unless waived in writing, shall not constitute a
waiver of ARM's rights to enforce such provision or any other provision of
this Licence in the future.
You agree to comply fully with all laws and regulations of the United States
and other countries ("Export Laws") to assure that the Software is not;
(1) exported, directly or indirectly, in violation of Export Laws, either to
any countries that are subject to U.S.A. export restrictions or to any end
user who has been prohibited from participating in the U.S.A. export
transactions by any federal agency of the U.S.A. government; or
(2) intended to be used for any purpose prohibited by Export Laws, including,
without limitation, nuclear, chemical, or biological weapons proliferation.

772
3rdparty/astc/mathlib.cpp vendored Normal file
View File

@@ -0,0 +1,772 @@
/*----------------------------------------------------------------------------*/
/**
* This confidential and proprietary software may be used only as
* authorised by a licensing agreement from ARM Limited
* (C) COPYRIGHT 2011-2012 ARM Limited
* ALL RIGHTS RESERVED
*
* The entire notice above must be reproduced on all authorised
* copies and copies may only be made to the extent permitted
* by a licensing agreement from ARM Limited.
*
* @brief Library of math functions.
*/
/*----------------------------------------------------------------------------*/
#include <time.h>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include "mathlib.h"
/**************************
basic OpenCL functions
**************************/
float inversesqrt(float p)
{
return 1.0f / sqrt(p);
}
float acospi(float p)
{
return static_cast < float >(acos(p) * (1.0f / M_PI));
};
float sinpi(float p)
{
return static_cast < float >(sin(p * M_PI));
}
float cospi(float p)
{
return static_cast < float >(cos(p * M_PI));
}
float nan(int p)
{
union
{
int p;
float q;
} v;
v.p = p | 0x7FC00000U;
return v.q;
}
#if (!_MSC_VER) && (__cplusplus < 201103L)
float fmax(float p, float q)
{
if (p != p)
return q;
if (q != q)
return p;
if (p > q)
return p;
return q;
}
float fmin(float p, float q)
{
if (p != p)
return q;
if (q != q)
return p;
if (p < q)
return p;
return q;
}
#endif // C++11
float2 fmax(float2 p, float2 q)
{
return float2(fmax(p.x, q.x), fmax(p.y, q.y));
}
float3 fmax(float3 p, float3 q)
{
return float3(fmax(p.x, q.x), fmax(p.y, q.y), fmax(p.z, q.z));
}
float4 fmax(float4 p, float4 q)
{
return float4(fmax(p.x, q.x), fmax(p.y, q.y), fmax(p.z, q.z), fmax(p.w, q.w));
}
float2 fmin(float2 p, float2 q)
{
return float2(fmin(p.x, q.x), fmin(p.y, q.y));
}
float3 fmin(float3 p, float3 q)
{
return float3(fmin(p.x, q.x), fmin(p.y, q.y), fmin(p.z, q.z));
}
float4 fmin(float4 p, float4 q)
{
return float4(fmin(p.x, q.x), fmin(p.y, q.y), fmin(p.z, q.z), fmin(p.w, q.w));
}
/*
float dot( float2 p, float2 q ) { return p.x*q.x + p.y*q.y; } float dot( float3 p, float3 q ) { return p.x*q.x + p.y*q.y + p.z*q.z; } float dot( float4 p, float4 q ) { return p.x*q.x + p.y*q.y +
p.z*q.z + p.w*q.w; } */
float3 cross(float3 p, float3 q)
{
return p.yzx * q.zxy - p.zxy * q.yzx;
}
float4 cross(float4 p, float4 q)
{
return float4(p.yzx * q.zxy - p.zxy * q.yzx, 0.0f);
}
float length(float2 p)
{
return sqrt(dot(p, p));
}
float length(float3 p)
{
return sqrt(dot(p, p));
}
float length(float4 p)
{
return sqrt(dot(p, p));
}
float length_sqr(float2 p)
{
return dot(p, p);
}
float length_sqr(float3 p)
{
return dot(p, p);
}
float length_sqr(float4 p)
{
return dot(p, p);
}
float distance(float2 p, float2 q)
{
return length(q - p);
}
float distance(float3 p, float3 q)
{
return length(q - p);
}
float distance(float4 p, float4 q)
{
return length(q - p);
}
float distance_sqr(float2 p, float2 q)
{
return length_sqr(q - p);
}
float distance_sqr(float3 p, float3 q)
{
return length_sqr(q - p);
}
float distance_sqr(float4 p, float4 q)
{
return length_sqr(q - p);
}
float2 normalize(float2 p)
{
return p / length(p);
}
float3 normalize(float3 p)
{
return p / length(p);
}
float4 normalize(float4 p)
{
return p / length(p);
}
/**************************************************
matrix functions, for 2x2, 3x3 and 4x4 matrices:
* trace
* determinant
* transform
* inverse
* adjugate
* characteristic polynomial
* eigenvalue
* eigenvector
additionally, root solver
for 2nd, 3rd and 4th degree monic polynomials.
*************************************************/
/*
struct mat2 { float2 v[2]; };
struct mat3 { float3 v[3]; };
struct mat4 { float4 v[4]; };
*/
float trace(mat2 p)
{
return p.v[0].x + p.v[1].y;
}
float trace(mat3 p)
{
return p.v[0].x + p.v[1].y + p.v[2].z;
}
float trace(mat4 p)
{
return p.v[0].x + p.v[1].y + p.v[2].z + p.v[3].w;
}
float determinant(mat2 p)
{
float2 v = p.v[0].xy * p.v[1].yx;
return v.x - v.y;
}
float determinant(mat3 p)
{
return dot(p.v[0], cross(p.v[1], p.v[2]));
}
float determinant(mat4 p)
{
return dot(p.v[0],
float4(dot(p.v[1].yzw, cross(p.v[2].yzw, p.v[3].yzw)),
-dot(p.v[1].xzw, cross(p.v[2].xzw, p.v[3].xzw)), dot(p.v[1].xyw, cross(p.v[2].xyw, p.v[3].xyw)), -dot(p.v[1].xyz, cross(p.v[2].xyz, p.v[3].xyz))));
}
/*
characteristic polynomials for matrices. These polynomials are monic, meaning that the coefficient of the highest component is 1; this component is omitted. The first component is the constant
part. */
float2 characteristic_poly(mat2 p)
{
return float2(determinant(p), -trace(p));
}
float3 characteristic_poly(mat3 p)
{
float2 v1 = (p.v[0].xy * p.v[1].yx) + (p.v[0].xz * p.v[2].zx) + (p.v[1].yz * p.v[2].zy);
return float3(-determinant(p), v1.x - v1.y, -trace(p));
}
float4 characteristic_poly(mat4 p)
{
float2 v1 = (p.v[0].xy * p.v[1].yx) + (p.v[0].xz * p.v[2].zx) + (p.v[0].xw * p.v[3].wx) + (p.v[1].yz * p.v[2].zy) + (p.v[1].yw * p.v[3].wy) + (p.v[2].zw * p.v[3].wz);
return float4(determinant(p),
-dot(p.v[1].yzw, cross(p.v[2].yzw, p.v[3].yzw))
- dot(p.v[0].xzw, cross(p.v[2].xzw, p.v[3].xzw)) - dot(p.v[0].xyw, cross(p.v[1].xyw, p.v[3].xyw)) - dot(p.v[0].xyz, cross(p.v[1].xyz, p.v[2].xyz)), v1.x - v1.y, -trace(p));
}
/*
Root finders for monic polynomials (highest coefficient is equal to 1)
Returns a vector with length equal to the number of roots that the polynomial has;
for roots that do not genuinely exist, we return NaN.
The polynomial is basically
poly(n) = p.x + p.y*n + p.z*n^2 + p.w*n^3
(including only the components of the vector that actually exist; the next coefficient
has the value 1, and the remaining ones have value 0. )
*/
float2 solve_monic(float2 p)
{
float v = sqrt(p.y * p.y - 4 * p.x);
return (p.yy + float2(v, -v)) * -0.5f;
}
float3 solve_monic(float3 p)
{
p = p * (1.0f / 3.0f);
float pz = p.z;
// compute a normalization value to scale the vector by.
// The normalization factor is divided by 2^20.
// This is supposed to make internal calculations unlikely
// to overflow while also making underflows unlikely.
float scal = 1.0f;
float cx = static_cast < float >(cbrt(fabs(p.x)));
float cy = static_cast < float >(cbrt(fabs(p.y)));
scal = fmax(fmax(fabsf(p.z), cx), cy * cy) * (1.0f / 1048576.0f);
float rscal = 1.0f / scal;
p = p * float3(rscal * rscal * rscal, rscal * rscal, rscal);
float bb = p.z * p.z; // div scal^2
float nq = bb - p.y; // div scal^2
float r = 1.5f * (p.y * p.z - p.x) - p.z * bb; // div scal^3
float nq3 = nq * nq * nq; // div scal^6
float r2 = r * r; // div scal^6
if (nq3 < r2)
{
// one root
float root = sqrt(r2 - nq3); // div scal^3
float s = static_cast < float >(cbrt(r + root)); // div scal
float t = static_cast < float >(cbrt(r - root)); // div scal
return float3((s + t) * scal - pz, nan(0), nan(0));
}
else
{
// three roots
float phi_r = inversesqrt(nq3); // div scal ^ -3
float phi_root = static_cast < float >(cbrt(phi_r * nq3)); // div scal
float theta = acospi(r * phi_r);
theta *= 1.0f / 3.0f;
float ncprod = phi_root * cospi(theta);
float dev = 1.73205080756887729353f * phi_root * sinpi(theta);
return float3(2 * ncprod, -dev - ncprod, dev - ncprod) * scal - pz;
}
}
/*
* This function is not overflow-safe. Use with care.
*/
float4 solve_monic(float4 p)
{
// step 1: depress the input polynomial
float bias = p.w * 0.25f;
float3 qv = float3((-3.0f / 256.0f) * p.w * p.w, (1.0f / 8.0f) * p.w, (-3.0 / 8.0f));
float3 rv = float3((1.0f / 16.0f) * p.z * p.w - (1.0f / 4.0f) * p.y, (-1.0f / 2.0f) * p.z, 0.0f);
float3 qx = float3(qv * p.w + rv) * p.w + p.xyz;
// step 2: solve a cubic equation to get hold of a parameter p.
float3 monicp = float3(-qx.y * qx.y, (qx.z * qx.z) - (4.0f * qx.x), 2.0f * qx.z);
float4 v = float4(solve_monic(monicp), 1e-37f);
// the cubic equation may have multiple solutions; at least one of them
// is numerically at least nonnegative (but may have become negative as a result of
// a roundoff error). We use fmax() to extract this value or a very small positive value.
float2 v2 = fmax(v.xy, v.zw);
float p2 = fmax(v2.x, v2.y); // p^2
float pr = inversesqrt(p2); // 1/p
float pm = p2 * pr; // p
// step 3: use the solution for the cubic equation to set up two quadratic equations;
// these two equations then result in the 4 possible roots.
float f1 = qx.z + p2;
float f2 = qx.y * pr;
float s = 0.5f * (f1 + f2);
float q = 0.5f * (f1 - f2);
float4 res = float4(solve_monic(float2(q, pm)),
solve_monic(float2(s, -pm)));
// finally, order the results and apply the bias.
if (res.x != res.x)
return res.zwxy - bias;
else
return res - bias;
}
float2 transform(mat2 p, float2 q)
{
return float2(dot(p.v[0], q), dot(p.v[1], q));
}
float3 transform(mat3 p, float3 q)
{
return float3(dot(p.v[0], q), dot(p.v[1], q), dot(p.v[2], q));
}
float4 transform(mat4 p, float4 q)
{
return float4(dot(p.v[0], q), dot(p.v[1], q), dot(p.v[2], q), dot(p.v[3], q));
}
mat2 adjugate(mat2 p)
{
mat2 res;
res.v[0] = float2(p.v[1].y, -p.v[0].y);
res.v[1] = float2(-p.v[1].x, p.v[0].x);
return res;
}
mat2 invert(mat2 p)
{
float rdet = 1.0f / determinant(p);
mat2 res;
res.v[0] = float2(p.v[1].y, -p.v[0].y) * rdet;
res.v[1] = float2(-p.v[1].x, p.v[0].x) * rdet;
return res;
}
mat3 adjugate(mat3 p)
{
mat3 res;
float3 prd0 = cross(p.v[1], p.v[2]);
float3 prd1 = cross(p.v[2], p.v[0]);
float3 prd2 = cross(p.v[0], p.v[1]);
res.v[0] = float3(prd0.x, prd1.x, prd2.x);
res.v[1] = float3(prd0.y, prd1.y, prd2.y);
res.v[2] = float3(prd0.z, prd1.z, prd2.z);
return res;
}
mat3 invert(mat3 p)
{
float3 cross0 = cross(p.v[1], p.v[2]);
float det = dot(cross0, p.v[0]);
float rdet = 1.0f / det;
mat3 res;
float3 prd0 = cross0 * rdet;
float3 prd1 = cross(p.v[2], p.v[0]) * rdet;
float3 prd2 = cross(p.v[0], p.v[1]) * rdet;
res.v[0] = float3(prd0.x, prd1.x, prd2.x);
res.v[1] = float3(prd0.y, prd1.y, prd2.y);
res.v[2] = float3(prd0.z, prd1.z, prd2.z);
return res;
}
mat4 adjugate(mat4 p)
{
mat4 res;
float3 bpc0 = cross(p.v[2].yzw, p.v[3].yzw);
float3 tpc0 = cross(p.v[0].yzw, p.v[1].yzw);
res.v[0] = float4(dot(bpc0, p.v[1].yzw), -dot(bpc0, p.v[0].yzw), dot(tpc0, p.v[3].yzw), -dot(tpc0, p.v[2].yzw));
float3 bpc1 = cross(p.v[2].xzw, p.v[3].xzw);
float3 tpc1 = cross(p.v[0].xzw, p.v[1].xzw);
res.v[1] = float4(-dot(bpc1, p.v[1].xzw), dot(bpc1, p.v[0].xzw), -dot(tpc1, p.v[3].xzw), dot(tpc1, p.v[2].xzw));
float3 bpc2 = cross(p.v[2].xyw, p.v[3].xyw);
float3 tpc2 = cross(p.v[0].xyw, p.v[1].xyw);
res.v[2] = float4(dot(bpc2, p.v[1].xyw), -dot(bpc2, p.v[0].xyw), dot(tpc2, p.v[3].xyw), -dot(tpc2, p.v[2].xyw));
float3 bpc3 = cross(p.v[2].xyz, p.v[3].xyz);
float3 tpc3 = cross(p.v[0].xyz, p.v[1].xyz);
res.v[3] = float4(-dot(bpc3, p.v[1].xyz), dot(bpc3, p.v[0].xyz), -dot(tpc3, p.v[3].xyz), dot(tpc3, p.v[2].xyz));
return res;
}
mat4 invert(mat4 p)
{
// cross products between the bottom two rows
float3 bpc0 = cross(p.v[2].yzw, p.v[3].yzw);
float3 bpc1 = cross(p.v[2].xzw, p.v[3].xzw);
float3 bpc2 = cross(p.v[2].xyw, p.v[3].xyw);
float3 bpc3 = cross(p.v[2].xyz, p.v[3].xyz);
// dot-products for the top rows
float4 row1 = float4(dot(bpc0, p.v[1].yzw),
-dot(bpc1, p.v[1].xzw),
dot(bpc2, p.v[1].xyw),
-dot(bpc3, p.v[1].xyz));
float det = dot(p.v[0], row1);
float rdet = 1.0f / det;
mat4 res;
float3 tpc0 = cross(p.v[0].yzw, p.v[1].yzw);
res.v[0] = float4(row1.x, -dot(bpc0, p.v[0].yzw), dot(tpc0, p.v[3].yzw), -dot(tpc0, p.v[2].yzw)) * rdet;
float3 tpc1 = cross(p.v[0].xzw, p.v[1].xzw);
res.v[1] = float4(row1.y, dot(bpc1, p.v[0].xzw), -dot(tpc1, p.v[3].xzw), dot(tpc1, p.v[2].xzw)) * rdet;
float3 tpc2 = cross(p.v[0].xyw, p.v[1].xyw);
res.v[2] = float4(row1.z, -dot(bpc2, p.v[0].xyw), dot(tpc2, p.v[3].xyw), -dot(tpc2, p.v[2].xyw)) * rdet;
float3 tpc3 = cross(p.v[0].xyz, p.v[1].xyz);
res.v[3] = float4(row1.w, dot(bpc3, p.v[0].xyz), -dot(tpc3, p.v[3].xyz), dot(tpc3, p.v[2].xyz)) * rdet;
return res;
}
float2 eigenvalues(mat2 p)
{
return solve_monic(characteristic_poly(p));
}
float3 eigenvalues(mat3 p)
{
return solve_monic(characteristic_poly(p));
}
float4 eigenvalues(mat4 p)
{
return solve_monic(characteristic_poly(p));
}
float2 eigenvector(mat2 p, float eigvl)
{
// for a mat2, we first reverse-subtract the eigenvalue from the matrix diagonal,
// then return whichever row had the larger sum-of-absolute-values.
float4 v = float4(p.v[0], p.v[1]);
v.xw = eigvl - v.xw;
if (fabs(v.x) + fabs(v.y) > fabs(v.z) + fabs(v.w))
return v.yx;
else
return v.wz;
}
float3 eigenvector(mat3 p, float eigvl)
{
// for a mat3, we obtain the eigenvector as follows:
// step 1: subtract the eigenvalue from the matrix diagonal
// step 2: take two cross products between rows in the matrix
// step 3: return whichever of the cross products resulted in a longer vector.
float3 r0 = p.v[0];
float3 r1 = p.v[1];
float3 r2 = p.v[2];
r0.x = r0.x - eigvl;
r1.y = r1.y - eigvl;
r2.z = r2.z - eigvl;
float3 v1 = cross(r0, r1);
float3 v2 = cross(r1, r2);
float len1 = dot(v1, v1);
float len2 = dot(v2, v2);
return len1 > len2 ? v1 : v2;
}
// generalized cross product: 3 vectors with 4 components each.
// The result is a vector that is perpendicular to all the three specified vectors.
// it works in the sense that it produces a perpendicular-to-everything vector,
// but it has not been tested whether it points in the "right" direction.
float4 gcross(float4 p, float4 q, float4 r)
{
return float4(dot(p.yzw, cross(q.yzw, r.yzw)), -dot(p.xzw, cross(q.xzw, r.xzw)), dot(p.xyw, cross(q.xyw, r.xyw)), -dot(p.xyz, cross(q.xyz, r.xyz)));
}
float4 eigenvector(mat4 p, float eigvl)
{
float4 r0 = p.v[0];
float4 r1 = p.v[1];
float4 r2 = p.v[2];
float4 r3 = p.v[3];
r0.x = r0.x - eigvl;
r1.y = r1.y - eigvl;
r2.z = r2.z - eigvl;
r3.w = r3.w - eigvl;
// generate four candidate vectors using the generalized cross product.
// These will in general point in the same direction (or 180 degree opposite),
// however they will have different lengths. Pick the longest one.
float3 tpc0 = cross(r0.yzw, r1.yzw);
float3 tpc1 = cross(r0.xzw, r1.xzw);
float3 tpc2 = cross(r0.xyw, r1.xyw);
float3 tpc3 = cross(r0.xyz, r1.xyz);
float4 v1 = float4(dot(r2.yzw, tpc0),
-dot(r2.xzw, tpc1),
dot(r2.xyw, tpc2),
-dot(r2.xyz, tpc3));
float4 v2 = float4(dot(r3.yzw, tpc0),
-dot(r3.xzw, tpc1),
dot(r3.xyw, tpc2),
-dot(r3.xyz, tpc3));
float3 bpc0 = cross(r2.yzw, r3.yzw);
float3 bpc1 = cross(r2.xzw, r3.xzw);
float3 bpc2 = cross(r2.xyw, r3.xyw);
float3 bpc3 = cross(r2.xyz, r3.xyz);
float4 v3 = float4(dot(r0.yzw, bpc0),
-dot(r0.xzw, bpc1),
dot(r0.xyw, bpc2),
-dot(r0.xyz, bpc3));
float4 v4 = float4(dot(r1.yzw, bpc0),
-dot(r1.xzw, bpc1),
dot(r1.xyw, bpc2),
-dot(r1.xyz, bpc3));
float len1 = dot(v1, v1);
float len2 = dot(v2, v2);
float len3 = dot(v3, v3);
float len4 = dot(v4, v4);
if (fmax(len1, len2) > fmax(len3, len4))
return len1 > len2 ? v1 : v2;
else
return len3 > len4 ? v3 : v4;
}
// matrix multiply
mat2 operator *(mat2 a, mat2 b)
{
mat2 res;
res.v[0] = a.v[0].x * b.v[0] + a.v[0].y * b.v[1];
res.v[1] = a.v[1].x * b.v[0] + a.v[1].y * b.v[1];
return res;
}
mat3 operator *(mat3 a, mat3 b)
{
mat3 res;
res.v[0] = a.v[0].x * b.v[0] + a.v[0].y * b.v[1] + a.v[0].z * b.v[2];
res.v[1] = a.v[1].x * b.v[0] + a.v[1].y * b.v[1] + a.v[1].z * b.v[2];
res.v[2] = a.v[2].x * b.v[0] + a.v[2].y * b.v[1] + a.v[2].z * b.v[2];
return res;
}
mat4 operator *(mat4 a, mat4 b)
{
mat4 res;
res.v[0] = a.v[0].x * b.v[0] + a.v[0].y * b.v[1] + a.v[0].z * b.v[2] + a.v[0].w * b.v[3];
res.v[1] = a.v[1].x * b.v[0] + a.v[1].y * b.v[1] + a.v[1].z * b.v[2] + a.v[1].w * b.v[3];
res.v[2] = a.v[2].x * b.v[0] + a.v[2].y * b.v[1] + a.v[2].z * b.v[2] + a.v[2].w * b.v[3];
res.v[3] = a.v[3].x * b.v[0] + a.v[3].y * b.v[1] + a.v[3].z * b.v[2] + a.v[3].w * b.v[3];
return res;
}
/*************************
simple geometric functions
*************************/
// return parameter value for the point on the line closest to the specified point
float param_nearest_on_line(float2 point, line2 line)
{
return dot(point - line.a, line.b) / dot(line.b, line.b);
}
float param_nearest_on_line(float3 point, line3 line)
{
return dot(point - line.a, line.b) / dot(line.b, line.b);
}
float param_nearest_on_line(float4 point, line4 line)
{
return dot(point - line.a, line.b) / dot(line.b, line.b);
}
// return distance between point and line
float point_line_distance(float2 point, line2 line)
{
return distance(point, line.a + line.b * param_nearest_on_line(point, line));
}
float point_line_distance(float3 point, line3 line)
{
return distance(point, line.a + line.b * param_nearest_on_line(point, line));
}
float point_line_distance(float4 point, line4 line)
{
return distance(point, line.a + line.b * param_nearest_on_line(point, line));
}
float point_line_distance_sqr(float2 point, line2 line)
{
return distance_sqr(point, line.a + line.b * param_nearest_on_line(point, line));
}
float point_line_distance_sqr(float3 point, line3 line)
{
return distance_sqr(point, line.a + line.b * param_nearest_on_line(point, line));
}
float point_line_distance_sqr(float4 point, line4 line)
{
return distance_sqr(point, line.a + line.b * param_nearest_on_line(point, line));
}
// distance between plane/hyperplane in 3D and 4D
float point_plane_3d_distance(float3 point, plane_3d plane)
{
return dot(point - plane.root_point, plane.normal);
}
float point_hyperplane_4d_distance(float4 point, hyperplane_4d plane)
{
return dot(point - plane.root_point, plane.normal);
}
// helper functions to produce a 3D plane from three points and a 4D hyperplane from four points.
plane_3d generate_plane_from_points(float3 point0, float3 point1, float3 point2)
{
plane_3d res;
res.root_point = point0;
res.normal = normalize(cross(point1 - point0, point2 - point0));
return res;
}
hyperplane_4d generate_hyperplane_from_points(float4 point0, float4 point1, float4 point2, float4 point3)
{
hyperplane_4d res;
res.root_point = point0;
res.normal = normalize(gcross(point1 - point0, point2 - point0, point3 - point0));
return res;
}

200
3rdparty/astc/mathlib.h vendored Normal file
View File

@@ -0,0 +1,200 @@
/*----------------------------------------------------------------------------*/
/**
* This confidential and proprietary software may be used only as
* authorised by a licensing agreement from ARM Limited
* (C) COPYRIGHT 2011-2012, 2018 ARM Limited
* ALL RIGHTS RESERVED
*
* The entire notice above must be reproduced on all authorised
* copies and copies may only be made to the extent permitted
* by a licensing agreement from ARM Limited.
*
* @brief Internal math library declarations for ASTC codec.
*/
/*----------------------------------------------------------------------------*/
#ifndef MATHLIB_H_INCLUDED
#define MATHLIB_H_INCLUDED
#include "vectypes.h"
// basic OpenCL functions
float inversesqrt(float p);
float acospi(float p);
float sinpi(float p);
float cospi(float p);
float nan(int p);
#if __cplusplus < 201103L
float fmax(float p, float q);
float fmin(float p, float q);
#endif // C++11
float2 fmax(float2 p, float2 q);
float3 fmax(float3 p, float3 q);
float4 fmax(float4 p, float4 q);
float2 fmin(float2 p, float2 q);
float3 fmin(float3 p, float3 q);
float4 fmin(float4 p, float4 q);
/*
float dot( float2 p, float2 q );
float dot( float3 p, float3 q );
float dot( float4 p, float4 q );
*/
static inline float dot(float2 p, float2 q)
{
return p.x * q.x + p.y * q.y;
}
static inline float dot(float3 p, float3 q)
{
return p.x * q.x + p.y * q.y + p.z * q.z;
}
static inline float dot(float4 p, float4 q)
{
return p.x * q.x + p.y * q.y + p.z * q.z + p.w * q.w;
}
float3 cross(float3 p, float3 q);
float4 cross(float4 p, float4 q);
float length(float2 p);
float length(float3 p);
float length(float4 p);
float length_sqr(float2 p);
float length_sqr(float3 p);
float length_sqr(float4 p);
float distance(float2 p, float2 q);
float distance(float3 p, float3 q);
float distance(float4 p, float4 q);
float distance_sqr(float2 p, float2 q);
float distance_sqr(float3 p, float3 q);
float distance_sqr(float4 p, float4 q);
float2 normalize(float2 p);
float3 normalize(float3 p);
float4 normalize(float4 p);
// functions other than just basic OpenCL functions
float4 gcross(float4 p, float4 q, float4 r);
struct mat2
{
float2 v[2];
};
struct mat3
{
float3 v[3];
};
struct mat4
{
float4 v[4];
};
float trace(mat2 p);
float trace(mat3 p);
float trace(mat4 p);
float determinant(mat2 p);
float determinant(mat3 p);
float determinant(mat4 p);
float2 characteristic_poly(mat2 p);
float3 characteristic_poly(mat3 p);
float4 characteristic_poly(mat4 p);
float2 solve_monic(float2 p);
float3 solve_monic(float3 p);
float4 solve_monic(float4 p);
float2 transform(mat2 p, float2 q);
float3 transform(mat3 p, float3 q);
float4 transform(mat4 p, float4 q);
mat2 adjugate(mat2 p);
mat3 adjugate(mat3 p);
mat4 adjugate(mat4 p);
mat2 invert(mat2 p);
mat3 invert(mat3 p);
mat4 invert(mat4 p);
float2 eigenvalues(mat2 p);
float3 eigenvalues(mat3 p);
float4 eigenvalues(mat4 p);
float2 eigenvector(mat2 p, float eigvl);
float3 eigenvector(mat3 p, float eigvl);
float4 eigenvector(mat4 p, float eigvl);
mat2 operator *(mat2 a, mat2 b);
mat3 operator *(mat3 a, mat3 b);
mat4 operator *(mat4 a, mat4 b);
// parametric line, 2D: The line is given by line = a + b*t.
struct line2
{
float2 a;
float2 b;
};
// parametric line, 3D
struct line3
{
float3 a;
float3 b;
};
struct line4
{
float4 a;
float4 b;
};
// plane/hyperplane defined by a point and a normal vector
struct plane_3d
{
float3 root_point;
float3 normal; // normalized
};
struct hyperplane_4d
{
float4 root_point;
float4 normal; // normalized
};
float param_nearest_on_line(float2 point, line2 line);
float param_nearest_on_line(float3 point, line3 line);
float param_nearest_on_line(float4 point, line4 line);
float point_line_distance(float2 point, line2 line);
float point_line_distance(float3 point, line3 line);
float point_line_distance(float4 point, line4 line);
float point_line_distance_sqr(float2 point, line2 line);
float point_line_distance_sqr(float3 point, line3 line);
float point_line_distance_sqr(float4 point, line4 line);
float point_plane_3d_distance(float3 point, plane_3d plane);
float point_hyperplane_4d_distance(float4 point, hyperplane_4d plane);
plane_3d generate_plane_from_points(float3 point0, float3 point1, float3 point2);
hyperplane_4d generate_hyperplane_from_points(float4 point0, float4 point1, float4 point2, float4 point3);
#endif

1
3rdparty/astc/readme.txt vendored Normal file
View File

@@ -0,0 +1 @@
Library version of astc-encoder, from https://github.com/andrewwillmott/astc-encoder.

398
3rdparty/astc/softfloat.cpp vendored Normal file
View File

@@ -0,0 +1,398 @@
/*----------------------------------------------------------------------------*/
/**
* This confidential and proprietary software may be used only as
* authorised by a licensing agreement from ARM Limited
* (C) COPYRIGHT 2011-2012 ARM Limited
* ALL RIGHTS RESERVED
*
* The entire notice above must be reproduced on all authorised
* copies and copies may only be made to the extent permitted
* by a licensing agreement from ARM Limited.
*
* @brief Soft IEEE-754 floating point library.
*/
/*----------------------------------------------------------------------------*/
#include "softfloat.h"
#define SOFTFLOAT_INLINE
/******************************************
helper functions and their lookup tables
******************************************/
/* count leading zeros functions. Only used when the input is nonzero. */
#if defined(__GNUC__) && (defined(__i386) || defined(__amd64))
#elif defined(__arm__) && defined(__ARMCC_VERSION)
#elif defined(__arm__) && defined(__GNUC__)
#else
/* table used for the slow default versions. */
static const uint8_t clz_table[256] =
{
8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
#endif
/*
32-bit count-leading-zeros function: use the Assembly instruction whenever possible. */
SOFTFLOAT_INLINE uint32_t clz32(uint32_t inp)
{
#if defined(__GNUC__) && (defined(__i386) || defined(__amd64))
uint32_t bsr;
__asm__("bsrl %1, %0": "=r"(bsr):"r"(inp | 1));
return 31 - bsr;
#else
#if defined(__arm__) && defined(__ARMCC_VERSION)
return __clz(inp); /* armcc builtin */
#else
#if defined(__arm__) && defined(__GNUC__)
uint32_t lz;
__asm__("clz %0, %1": "=r"(lz):"r"(inp));
return lz;
#else
/* slow default version */
uint32_t summa = 24;
if (inp >= UINT32_C(0x10000))
{
inp >>= 16;
summa -= 16;
}
if (inp >= UINT32_C(0x100))
{
inp >>= 8;
summa -= 8;
}
return summa + clz_table[inp];
#endif
#endif
#endif
}
static SOFTFLOAT_INLINE uint32_t rtne_shift32(uint32_t inp, uint32_t shamt)
{
uint32_t vl1 = UINT32_C(1) << shamt;
uint32_t inp2 = inp + (vl1 >> 1); /* added 0.5 ULP */
uint32_t msk = (inp | UINT32_C(1)) & vl1; /* nonzero if odd. '| 1' forces it to 1 if the shamt is 0. */
msk--; /* negative if even, nonnegative if odd. */
inp2 -= (msk >> 31); /* subtract epsilon before shift if even. */
inp2 >>= shamt;
return inp2;
}
static SOFTFLOAT_INLINE uint32_t rtna_shift32(uint32_t inp, uint32_t shamt)
{
uint32_t vl1 = (UINT32_C(1) << shamt) >> 1;
inp += vl1;
inp >>= shamt;
return inp;
}
static SOFTFLOAT_INLINE uint32_t rtup_shift32(uint32_t inp, uint32_t shamt)
{
uint32_t vl1 = UINT32_C(1) << shamt;
inp += vl1;
inp--;
inp >>= shamt;
return inp;
}
/* convert from FP16 to FP32. */
sf32 sf16_to_sf32(sf16 inp)
{
uint32_t inpx = inp;
/*
This table contains, for every FP16 sign/exponent value combination,
the difference between the input FP16 value and the value obtained
by shifting the correct FP32 result right by 13 bits.
This table allows us to handle every case except denormals and NaN
with just 1 table lookup, 2 shifts and 1 add.
*/
#define WITH_MB(a) INT32_C((a) | (1 << 31))
static const int32_t tbl[64] =
{
WITH_MB(0x00000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000),
INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000),
INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000),
INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), WITH_MB(0x38000),
WITH_MB(0x38000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000),
INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000),
INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000),
INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), WITH_MB(0x70000)
};
int32_t res = tbl[inpx >> 10];
res += inpx;
/* the normal cases: the MSB of 'res' is not set. */
if (res >= 0) /* signed compare */
return res << 13;
/* Infinity and Zero: the bottom 10 bits of 'res' are clear. */
if ((res & UINT32_C(0x3FF)) == 0)
return res << 13;
/* NaN: the exponent field of 'inp' is not zero; NaNs must be quietened. */
if ((inpx & 0x7C00) != 0)
return (res << 13) | UINT32_C(0x400000);
/* the remaining cases are Denormals. */
{
uint32_t sign = (inpx & UINT32_C(0x8000)) << 16;
uint32_t mskval = inpx & UINT32_C(0x7FFF);
uint32_t leadingzeroes = clz32(mskval);
mskval <<= leadingzeroes;
return (mskval >> 8) + ((0x85 - leadingzeroes) << 23) + sign;
}
}
/* Conversion routine that converts from FP32 to FP16. It supports denormals and all rounding modes. If a NaN is given as input, it is quietened. */
sf16 sf32_to_sf16(sf32 inp, roundmode rmode)
{
/* for each possible sign/exponent combination, store a case index. This gives a 512-byte table */
static const uint8_t tab[512] = {
0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 50,
5, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 45,
45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 55,
};
/* many of the cases below use a case-dependent magic constant. So we look up a magic constant before actually performing the switch. This table allows us to group cases, thereby minimizing code
size. */
static const uint32_t tabx[60] = {
UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0x8000), UINT32_C(0x80000000), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000),
UINT32_C(1), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0x8000), UINT32_C(0x8001), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000),
UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000),
UINT32_C(0xC8001FFF), UINT32_C(0xC8000000), UINT32_C(0xC8000000), UINT32_C(0xC8000FFF), UINT32_C(0xC8001000),
UINT32_C(0x58000000), UINT32_C(0x38001FFF), UINT32_C(0x58000000), UINT32_C(0x58000FFF), UINT32_C(0x58001000),
UINT32_C(0x7C00), UINT32_C(0x7BFF), UINT32_C(0x7BFF), UINT32_C(0x7C00), UINT32_C(0x7C00),
UINT32_C(0xFBFF), UINT32_C(0xFC00), UINT32_C(0xFBFF), UINT32_C(0xFC00), UINT32_C(0xFC00),
UINT32_C(0x90000000), UINT32_C(0x90000000), UINT32_C(0x90000000), UINT32_C(0x90000000), UINT32_C(0x90000000),
UINT32_C(0x20000000), UINT32_C(0x20000000), UINT32_C(0x20000000), UINT32_C(0x20000000), UINT32_C(0x20000000)
};
uint32_t p;
uint32_t idx = rmode + tab[inp >> 23];
uint32_t vlx = tabx[idx];
switch (idx)
{
/*
Positive number which may be Infinity or NaN.
We need to check whether it is NaN; if it is, quieten it by setting the top bit of the mantissa.
(If we don't do this quieting, then a NaN that is distinguished only by having
its low-order bits set, would be turned into an INF. */
case 50:
case 51:
case 52:
case 53:
case 54:
case 55:
case 56:
case 57:
case 58:
case 59:
/*
the input value is 0x7F800000 or 0xFF800000 if it is INF.
By subtracting 1, we get 7F7FFFFF or FF7FFFFF, that is, bit 23 becomes zero.
For NaNs, however, this operation will keep bit 23 with the value 1.
We can then extract bit 23, and logical-OR bit 9 of the result with this
bit in order to quieten the NaN (a Quiet NaN is a NaN where the top bit
of the mantissa is set.)
*/
p = (inp - 1) & UINT32_C(0x800000); /* zero if INF, nonzero if NaN. */
return ((inp + vlx) >> 13) | (p >> 14);
/*
positive, exponent = 0, round-mode == UP; need to check whether number actually is 0.
If it is, then return 0, else return 1 (the smallest representable nonzero number)
*/
case 0:
/*
-inp will set the MSB if the input number is nonzero.
Thus (-inp) >> 31 will turn into 0 if the input number is 0 and 1 otherwise.
*/
return (uint32_t) (-(int32_t) inp) >> 31;
/*
negative, exponent = , round-mode == DOWN, need to check whether number is
actually 0. If it is, return 0x8000 ( float -0.0 )
Else return the smallest negative number ( 0x8001 ) */
case 6:
/*
in this case 'vlx' is 0x80000000. By subtracting the input value from it,
we obtain a value that is 0 if the input value is in fact zero and has
the MSB set if it isn't. We then right-shift the value by 31 places to
get a value that is 0 if the input is -0.0 and 1 otherwise.
*/
return ((vlx - inp) >> 31) + UINT32_C(0x8000);
/*
for all other cases involving underflow/overflow, we don't need to
do actual tests; we just return 'vlx'.
*/
case 1:
case 2:
case 3:
case 4:
case 5:
case 7:
case 8:
case 9:
case 10:
case 11:
case 12:
case 13:
case 14:
case 15:
case 16:
case 17:
case 18:
case 19:
case 40:
case 41:
case 42:
case 43:
case 44:
case 45:
case 46:
case 47:
case 48:
case 49:
return vlx;
/*
for normal numbers, 'vlx' is the difference between the FP32 value of a number and the
FP16 representation of the same number left-shifted by 13 places. In addition, a rounding constant is
baked into 'vlx': for rounding-away-from zero, the constant is 2^13 - 1, causing roundoff away
from zero. for round-to-nearest away, the constant is 2^12, causing roundoff away from zero.
for round-to-nearest-even, the constant is 2^12 - 1. This causes correct round-to-nearest-even
except for odd input numbers. For odd input numbers, we need to add 1 to the constant. */
/* normal number, all rounding modes except round-to-nearest-even: */
case 30:
case 31:
case 32:
case 34:
case 35:
case 36:
case 37:
case 39:
return (inp + vlx) >> 13;
/* normal number, round-to-nearest-even. */
case 33:
case 38:
p = inp + vlx;
p += (inp >> 13) & 1;
return p >> 13;
/*
the various denormal cases. These are not expected to be common, so their performance is a bit
less important. For each of these cases, we need to extract an exponent and a mantissa
(including the implicit '1'!), and then right-shift the mantissa by a shift-amount that
depends on the exponent. The shift must apply the correct rounding mode. 'vlx' is used to supply the
sign of the resulting denormal number.
*/
case 21:
case 22:
case 25:
case 27:
/* denormal, round towards zero. */
p = 126 - ((inp >> 23) & 0xFF);
return (((inp & UINT32_C(0x7FFFFF)) + UINT32_C(0x800000)) >> p) | vlx;
case 20:
case 26:
/* denormal, round away from zero. */
p = 126 - ((inp >> 23) & 0xFF);
return rtup_shift32((inp & UINT32_C(0x7FFFFF)) + UINT32_C(0x800000), p) | vlx;
case 24:
case 29:
/* denormal, round to nearest-away */
p = 126 - ((inp >> 23) & 0xFF);
return rtna_shift32((inp & UINT32_C(0x7FFFFF)) + UINT32_C(0x800000), p) | vlx;
case 23:
case 28:
/* denormal, round to nearest-even. */
p = 126 - ((inp >> 23) & 0xFF);
return rtne_shift32((inp & UINT32_C(0x7FFFFF)) + UINT32_C(0x800000), p) | vlx;
}
return 0;
}
typedef union if32_
{
uint32_t u;
int32_t s;
float f;
} if32;
/* convert from soft-float to native-float */
float sf16_to_float(sf16 p)
{
if32 i;
i.u = sf16_to_sf32(p);
return i.f;
}
/* convert from native-float to soft-float */
sf16 float_to_sf16(float p, roundmode rm)
{
if32 i;
i.f = p;
return sf32_to_sf16(i.u, rm);
}

95
3rdparty/astc/softfloat.h vendored Normal file
View File

@@ -0,0 +1,95 @@
/*----------------------------------------------------------------------------*/
/**
* This confidential and proprietary software may be used only as
* authorised by a licensing agreement from ARM Limited
* (C) COPYRIGHT 2011-2012 ARM Limited
* ALL RIGHTS RESERVED
*
* The entire notice above must be reproduced on all authorised
* copies and copies may only be made to the extent permitted
* by a licensing agreement from ARM Limited.
*
* @brief Soft IEEE-754 floating point library.
*/
/*----------------------------------------------------------------------------*/
#ifndef SOFTFLOAT_H_INCLUDED
#define SOFTFLOAT_H_INCLUDED
#if defined __cplusplus
extern "C"
{
#endif
#if defined __cplusplus && !defined(_MSC_VER)
/* if compiling as C++, we need to define these macros in order to obtain all the macros in stdint.h . */
#define __STDC_LIMIT_MACROS
#define __STDC_CONSTANT_MACROS
#include <stdint.h>
#else
typedef unsigned char uint8_t;
typedef signed char int8_t;
typedef unsigned short uint16_t;
typedef signed short int16_t;
typedef unsigned int uint32_t;
typedef signed int int32_t;
#endif
uint32_t clz32(uint32_t p);
/* targets that don't have UINT32_C probably don't have the rest of C99s stdint.h */
#ifndef UINT32_C
#define PASTE(a) a
#define UINT64_C(a) PASTE(a##ULL)
#define UINT32_C(a) PASTE(a##U)
#define INT64_C(a) PASTE(a##LL)
#define INT32_C(a) a
#define PRIX32 "X"
#define PRId32 "d"
#define PRIu32 "u"
#define PRIX64 "LX"
#define PRId64 "Ld"
#define PRIu64 "Lu"
#endif
/* sized soft-float types. These are mapped to the sized integer types of C99, instead of C's
floating-point types; this is because the library needs to maintain exact, bit-level control on all
operations on these data types. */
typedef uint16_t sf16;
typedef uint32_t sf32;
/* the five rounding modes that IEEE-754r defines */
typedef enum
{
SF_UP = 0, /* round towards positive infinity */
SF_DOWN = 1, /* round towards negative infinity */
SF_TOZERO = 2, /* round towards zero */
SF_NEARESTEVEN = 3, /* round toward nearest value; if mid-between, round to even value */
SF_NEARESTAWAY = 4 /* round toward nearest value; if mid-between, round away from zero */
} roundmode;
/* narrowing float->float conversions */
sf16 sf32_to_sf16(sf32, roundmode);
/* widening float->float conversions */
sf32 sf16_to_sf32(sf16);
sf16 float_to_sf16(float, roundmode);
float sf16_to_float(sf16);
#if defined __cplusplus
}
#endif
#endif

16209
3rdparty/astc/vectypes.h vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -30,6 +30,8 @@ project "bimg_encode"
path.join(BIMG_DIR, "3rdparty/nvtt/**.h"),
path.join(BIMG_DIR, "3rdparty/pvrtc/**.cpp"),
path.join(BIMG_DIR, "3rdparty/pvrtc/**.h"),
path.join(BIMG_DIR, "3rdparty/astc/**.cpp"),
path.join(BIMG_DIR, "3rdparty/astc/**.h"),
path.join(BIMG_DIR, "3rdparty/tinyexr/**.h"),
path.join(BIMG_DIR, "3rdparty/iqa/include/**.h"),
path.join(BIMG_DIR, "3rdparty/iqa/source/**.c"),

View File

@@ -9,27 +9,9 @@ project "texturec"
includedirs {
path.join(BX_DIR, "include"),
path.join(BIMG_DIR, "include"),
path.join(BIMG_DIR, "3rdparty"),
path.join(BIMG_DIR, "3rdparty/nvtt"),
path.join(BIMG_DIR, "3rdparty/iqa/include"),
}
files {
path.join(BIMG_DIR, "3rdparty/libsquish/**.cpp"),
path.join(BIMG_DIR, "3rdparty/libsquish/**.h"),
path.join(BIMG_DIR, "3rdparty/edtaa3/**.cpp"),
path.join(BIMG_DIR, "3rdparty/edtaa3/**.h"),
path.join(BIMG_DIR, "3rdparty/etc1/**.cpp"),
path.join(BIMG_DIR, "3rdparty/etc1/**.h"),
path.join(BIMG_DIR, "3rdparty/etc2/**.cpp"),
path.join(BIMG_DIR, "3rdparty/etc2/**.hpp"),
path.join(BIMG_DIR, "3rdparty/nvtt/**.cpp"),
path.join(BIMG_DIR, "3rdparty/nvtt/**.h"),
path.join(BIMG_DIR, "3rdparty/pvrtc/**.cpp"),
path.join(BIMG_DIR, "3rdparty/pvrtc/**.h"),
path.join(BIMG_DIR, "3rdparty/tinyexr/**.h"),
path.join(BIMG_DIR, "3rdparty/iqa/include/**.h"),
path.join(BIMG_DIR, "3rdparty/iqa/source/**.c"),
path.join(BIMG_DIR, "tools/texturec/**.cpp"),
path.join(BIMG_DIR, "tools/texturec/**.h"),
}

View File

@@ -19,6 +19,10 @@
BX_ERROR_RESULT(BIMG_ERROR, BX_MAKEFOURCC('b', 'i', 'm', 'g') );
#ifndef BIMG_CONFIG_ASTC_DECODE
#define BIMG_CONFIG_ASTC_DECODE 0
#endif
namespace bimg
{
struct Memory

View File

@@ -3,9 +3,15 @@
* License: https://github.com/bkaradzic/bimg#license-bsd-2-clause
*/
#define BIMG_CONFIG_ASTC_DECODE 1
#include "bimg_p.h"
#include <bx/hash.h>
#if BIMG_CONFIG_ASTC_DECODE
#include "../3rdparty/astc/astc_lib.h"
#endif
namespace bimg
{
static const ImageBlockInfo s_imageBlockInfo[] =
@@ -4476,8 +4482,24 @@ namespace bimg
case TextureFormat::ASTC8x5:
case TextureFormat::ASTC8x6:
case TextureFormat::ASTC10x5:
BX_WARN(false, "ASTC decoder is not implemented.");
# if BIMG_CONFIG_ASTC_DECODE
astc_decompress
(
(const uint8_t*) _src,
s_imageBlockInfo[_srcFormat].blockWidth,
s_imageBlockInfo[_srcFormat].blockHeight,
ASTC_DECODE_LDR_LINEAR,
_width,
_height,
(uint8_t*) _dst,
ASTC_BGRA,
_dstPitch
);
# else
BX_WARN(false, "ASTC decoder is not implemented.");
imageCheckerboard(_dst, _width, _height, 16, UINT32_C(0xff000000), UINT32_C(0xffffff00) );
# endif
break;
case TextureFormat::RGBA8:
@@ -5179,8 +5201,9 @@ namespace bimg
{
BX_ERROR_SCOPE(_err);
uint32_t ddspf = UINT32_MAX;
uint32_t dxgiFormat = UINT32_MAX;
uint32_t ddspf = UINT32_MAX;
uint32_t dxgiFormat = UINT32_MAX;
uint32_t fourccFormat = UINT32_MAX;
for (uint32_t ii = 0; ii < BX_COUNTOF(s_translateDdsPixelFormat); ++ii)
{
@@ -5201,14 +5224,26 @@ namespace bimg
break;
}
}
if (UINT32_MAX == dxgiFormat)
{
BX_ERROR_SET(_err, BIMG_ERROR, "DDS: DXGI format not supported.");
return 0;
}
}
if (UINT32_MAX == ddspf && UINT32_MAX == dxgiFormat)
{
for (uint32_t ii = 0; ii < BX_COUNTOF(s_translateDdsFourccFormat); ++ii)
{
if (s_translateDdsFourccFormat[ii].m_textureFormat == _format)
{
fourccFormat = s_translateDdsFourccFormat[ii].m_format;
break;
}
}
}
if (UINT32_MAX == ddspf && UINT32_MAX == dxgiFormat && UINT32_MAX == fourccFormat)
{
BX_ERROR_SET(_err, BIMG_ERROR, "DDS: output format not supported.");
return 0;
}
const uint32_t bpp = getBitsPerPixel(_format);
uint32_t total = 0;
@@ -5254,9 +5289,14 @@ namespace bimg
{
total += bx::write(_writer, uint32_t(8*sizeof(uint32_t) ), _err); // pixelFormatSize
total += bx::write(_writer, uint32_t(DDPF_FOURCC), _err);
total += bx::write(_writer, uint32_t(DDS_DX10), _err);
total += bx::write(_writer, uint32_t(0), _err); // bitCount
total += bx::writeRep(_writer, 0, 4*sizeof(uint32_t), _err); // bitmask
if (UINT32_MAX != fourccFormat)
total += bx::write(_writer, fourccFormat, _err);
else
total += bx::write(_writer, uint32_t(DDS_DX10), _err);
total += bx::write(_writer, uint32_t(0), _err); // bitCount
total += bx::writeRep(_writer, 0, 4*sizeof(uint32_t), _err); // bitmask
}
uint32_t caps[4] =

View File

@@ -12,6 +12,7 @@
#include <nvtt/nvtt.h>
#include <pvrtc/PvrTcEncoder.h>
#include <edtaa3/edtaa3func.h>
#include <astc/astc_lib.h>
BX_PRAGMA_DIAGNOSTIC_PUSH();
BX_PRAGMA_DIAGNOSTIC_IGNORED_MSVC(4100) // warning C4100: 'alloc_context': unreferenced formal parameter
@@ -35,6 +36,14 @@ namespace bimg
};
BX_STATIC_ASSERT(Quality::Count == BX_COUNTOF(s_squishQuality) );
static const ASTC_COMPRESS_MODE s_astcQuality[] =
{
ASTC_COMPRESS_MEDIUM, // Default
ASTC_COMPRESS_THOROUGH, // Highest
ASTC_COMPRESS_FAST, // Fastest
};
BX_STATIC_ASSERT(Quality::Count == BX_COUNTOF(s_astcQuality));
void imageEncodeFromRgba8(bx::AllocatorI* _allocator, void* _dst, const void* _src, uint32_t _width, uint32_t _height, uint32_t _depth, TextureFormat::Enum _format, Quality::Enum _quality, bx::Error* _err)
{
const uint8_t* src = (const uint8_t*)_src;
@@ -122,6 +131,22 @@ namespace bimg
}
break;
case TextureFormat::ASTC4x4:
case TextureFormat::ASTC5x5:
case TextureFormat::ASTC6x6:
case TextureFormat::ASTC8x5:
case TextureFormat::ASTC8x6:
case TextureFormat::ASTC10x5:
{
const bimg::ImageBlockInfo& astcBlockInfo = bimg::getBlockInfo(_format);
ASTC_COMPRESS_MODE compress_mode = s_astcQuality[_quality];
ASTC_DECODE_MODE decode_mode = ASTC_DECODE_LDR_LINEAR;
astc_compress(_width, _height, src, ASTC_RGBA, srcPitch, astcBlockInfo.blockWidth, astcBlockInfo.blockHeight, compress_mode, decode_mode, dst);
}
break;
case TextureFormat::BGRA8:
imageSwizzleBgra8(dst, dstPitch, _width, _height, src, srcPitch);
break;
@@ -200,15 +225,21 @@ namespace bimg
{
switch (_dstFormat)
{
case bimg::TextureFormat::BC1:
case bimg::TextureFormat::BC2:
case bimg::TextureFormat::BC3:
case bimg::TextureFormat::BC4:
case bimg::TextureFormat::BC5:
case bimg::TextureFormat::ETC1:
case bimg::TextureFormat::ETC2:
case bimg::TextureFormat::PTC14:
case bimg::TextureFormat::PTC14A:
case TextureFormat::BC1:
case TextureFormat::BC2:
case TextureFormat::BC3:
case TextureFormat::BC4:
case TextureFormat::BC5:
case TextureFormat::ETC1:
case TextureFormat::ETC2:
case TextureFormat::PTC14:
case TextureFormat::PTC14A:
case TextureFormat::ASTC4x4:
case TextureFormat::ASTC5x5:
case TextureFormat::ASTC6x6:
case TextureFormat::ASTC8x5:
case TextureFormat::ASTC8x6:
case TextureFormat::ASTC10x5:
{
uint8_t* temp = (uint8_t*)BX_ALLOC(_allocator, _width*_height*_depth*4);
imageDecodeToRgba8(_allocator, temp, _src, _width, _height, _width*4, _srcFormat);

View File

@@ -156,12 +156,12 @@ bimg::ImageContainer* convert(bx::AllocatorI* _allocator, const void* _inputData
const bimg::ImageBlockInfo& inputBlockInfo = bimg::getBlockInfo(inputFormat);
const bimg::ImageBlockInfo& outputBlockInfo = bimg::getBlockInfo(outputFormat);
const uint32_t blockWidth = outputBlockInfo.blockWidth;
const uint32_t blockHeight = outputBlockInfo.blockHeight;
const uint32_t minBlockX = outputBlockInfo.minBlockX;
const uint32_t minBlockY = outputBlockInfo.minBlockY;
uint32_t outputWidth = bx::uint32_max(blockWidth * minBlockX, ( (input->m_width + blockWidth - 1) / blockWidth )*blockWidth);
uint32_t outputHeight = bx::uint32_max(blockHeight * minBlockY, ( (input->m_height + blockHeight - 1) / blockHeight)*blockHeight);
uint32_t outputDepth = input->m_depth;
const uint32_t blockHeight = outputBlockInfo.blockHeight;
const uint32_t minBlockX = outputBlockInfo.minBlockX;
const uint32_t minBlockY = outputBlockInfo.minBlockY;
uint32_t outputWidth = bx::uint32_max(blockWidth * minBlockX, ( (input->m_width + blockWidth - 1) / blockWidth )*blockWidth);
uint32_t outputHeight = bx::uint32_max(blockHeight * minBlockY, ( (input->m_height + blockHeight - 1) / blockHeight)*blockHeight);
uint32_t outputDepth = input->m_depth;
if (_options.equirect)
{
@@ -842,10 +842,11 @@ void help(const char* _error = NULL, bool _showHelp = true)
" aspect ratio will be preserved.\n"
" --radiance <model> Radiance cubemap filter. (Lighting model: Phong, PhongBrdf, Blinn, BlinnBrdf, GGX)\n"
" --as <extension> Save as.\n"
" --formats List all supported formats.\n"
" --validate *DEBUG* Validate that output image produced matches after loading.\n"
"\n"
"For additional information, see https://github.com/bkaradzic/bgfx\n"
"For additional information, see https://github.com/bkaradzic/bimg\n"
);
}
@@ -909,6 +910,24 @@ int main(int _argc, const char* _argv[])
return bx::kExitFailure;
}
if (cmdLine.hasArg("formats"))
{
printf("Uncompressed formats:\n");
for (int format = bimg::TextureFormat::Unknown + 1; format < bimg::TextureFormat::UnknownDepth; format++)
printf(" %s\n", bimg::getName((bimg::TextureFormat::Enum) format));
for (int format = bimg::TextureFormat::UnknownDepth + 1; format < bimg::TextureFormat::Count; format++)
printf(" %s\n", bimg::getName((bimg::TextureFormat::Enum) format));
printf("Compressed formats:\n");
for (int format = 0; format < bimg::TextureFormat::Unknown; format++)
printf(" %s\n", bimg::getName((bimg::TextureFormat::Enum) format));
return bx::kExitSuccess;
}
const char* inputFileName = cmdLine.findOption('f');
if (NULL == inputFileName)
{