mirror of
https://github.com/bkaradzic/bimg.git
synced 2026-02-17 20:52:38 +01:00
ASTC encoding support
- Add 3rdparty/astc with lib version of the standard encoder - Add astc_compress call for ASTC formats - Add BIMG_CONFIG_ASTC_DECODE-gated decompression support. This is just for testing, the decompress code is currently too heavy to include in the core lib. - Add fourcc support for DDS decode so ASTC and other formats not covered by dxgi can be read - Add --formats option to texturec, lists all supported formats - Update genie files -- add astc to bimg_encode and remove redundant files from texturec
This commit is contained in:
627
3rdparty/astc/astc_averages_and_directions.cpp
vendored
Normal file
627
3rdparty/astc/astc_averages_and_directions.cpp
vendored
Normal file
@@ -0,0 +1,627 @@
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This confidential and proprietary software may be used only as
|
||||
* authorised by a licensing agreement from ARM Limited
|
||||
* (C) COPYRIGHT 2011-2012 ARM Limited
|
||||
* ALL RIGHTS RESERVED
|
||||
*
|
||||
* The entire notice above must be reproduced on all authorised
|
||||
* copies and copies may only be made to the extent permitted
|
||||
* by a licensing agreement from ARM Limited.
|
||||
*
|
||||
* @brief Implements functions for finding dominant direction of a set of
|
||||
* colors, using ARM patent pending method.
|
||||
*/
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
#include "astc_codec_internals.h"
|
||||
|
||||
#include <math.h>
|
||||
#include "mathlib.h"
|
||||
|
||||
#ifdef DEBUG_CAPTURE_NAN
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
|
||||
#include <fenv.h>
|
||||
#endif
|
||||
|
||||
/* routines to compute average colors and dominant directions for blocks with 3 and 4 components. */
|
||||
|
||||
/*
|
||||
for a full block, functions to compute averages and dominant directions. The averages and directions are computed separately for each partition.
|
||||
We have separate versions for blocks with and without alpha, since the processing for blocks with alpha is significantly more expensive.
|
||||
The direction vectors it produces are NOT normalized.
|
||||
*/
|
||||
void compute_averages_and_directions_rgba(const partition_info * pt,
|
||||
const imageblock * blk,
|
||||
const error_weight_block * ewb,
|
||||
const float4 * color_scalefactors,
|
||||
float4 * averages, float4 * directions_rgba, float3 * directions_gba, float3 * directions_rba, float3 * directions_rga, float3 * directions_rgb)
|
||||
{
|
||||
int i;
|
||||
int partition_count = pt->partition_count;
|
||||
int partition;
|
||||
|
||||
for (partition = 0; partition < partition_count; partition++)
|
||||
{
|
||||
const uint8_t *weights = pt->texels_of_partition[partition];
|
||||
int texelcount = pt->texels_per_partition[partition];
|
||||
|
||||
float4 base_sum = float4(0, 0, 0, 0);
|
||||
float partition_weight = 0.0f;
|
||||
|
||||
for (i = 0; i < texelcount; i++)
|
||||
{
|
||||
int iwt = weights[i];
|
||||
float weight = ewb->texel_weight[iwt];
|
||||
float4 texel_datum = float4(blk->work_data[4 * iwt],
|
||||
blk->work_data[4 * iwt + 1],
|
||||
blk->work_data[4 * iwt + 2],
|
||||
blk->work_data[4 * iwt + 3]) * weight;
|
||||
partition_weight += weight;
|
||||
|
||||
base_sum = base_sum + texel_datum;
|
||||
}
|
||||
|
||||
float4 average = base_sum * 1.0f / MAX(partition_weight, 1e-7f);
|
||||
averages[partition] = average * color_scalefactors[partition];
|
||||
|
||||
|
||||
float4 sum_xp = float4(0, 0, 0, 0);
|
||||
float4 sum_yp = float4(0, 0, 0, 0);
|
||||
float4 sum_zp = float4(0, 0, 0, 0);
|
||||
float4 sum_wp = float4(0, 0, 0, 0);
|
||||
|
||||
for (i = 0; i < texelcount; i++)
|
||||
{
|
||||
int iwt = weights[i];
|
||||
float weight = ewb->texel_weight[iwt];
|
||||
float4 texel_datum = float4(blk->work_data[4 * iwt],
|
||||
blk->work_data[4 * iwt + 1],
|
||||
blk->work_data[4 * iwt + 2],
|
||||
blk->work_data[4 * iwt + 3]);
|
||||
texel_datum = (texel_datum - average) * weight;
|
||||
|
||||
if (texel_datum.x > 0.0f)
|
||||
sum_xp = sum_xp + texel_datum;
|
||||
if (texel_datum.y > 0.0f)
|
||||
sum_yp = sum_yp + texel_datum;
|
||||
if (texel_datum.z > 0.0f)
|
||||
sum_zp = sum_zp + texel_datum;
|
||||
if (texel_datum.w > 0.0f)
|
||||
sum_wp = sum_wp + texel_datum;
|
||||
}
|
||||
|
||||
float prod_xp = dot(sum_xp, sum_xp);
|
||||
float prod_yp = dot(sum_yp, sum_yp);
|
||||
float prod_zp = dot(sum_zp, sum_zp);
|
||||
float prod_wp = dot(sum_wp, sum_wp);
|
||||
|
||||
float4 best_vector = sum_xp;
|
||||
float best_sum = prod_xp;
|
||||
if (prod_yp > best_sum)
|
||||
{
|
||||
best_vector = sum_yp;
|
||||
best_sum = prod_yp;
|
||||
}
|
||||
if (prod_zp > best_sum)
|
||||
{
|
||||
best_vector = sum_zp;
|
||||
best_sum = prod_zp;
|
||||
}
|
||||
if (prod_wp > best_sum)
|
||||
{
|
||||
best_vector = sum_wp;
|
||||
best_sum = prod_wp;
|
||||
}
|
||||
|
||||
directions_rgba[partition] = best_vector;
|
||||
directions_rgb[partition] = best_vector.xyz;
|
||||
directions_rga[partition] = best_vector.xyw;
|
||||
directions_rba[partition] = best_vector.xzw;
|
||||
directions_gba[partition] = best_vector.yzw;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void compute_averages_and_directions_rgb(const partition_info * pt,
|
||||
const imageblock * blk,
|
||||
const error_weight_block * ewb,
|
||||
const float4 * color_scalefactors, float3 * averages, float3 * directions_rgb, float2 * directions_rg, float2 * directions_rb, float2 * directions_gb)
|
||||
{
|
||||
int i;
|
||||
int partition_count = pt->partition_count;
|
||||
int partition;
|
||||
|
||||
const float *texel_weights = ewb->texel_weight_rgb;
|
||||
|
||||
for (partition = 0; partition < partition_count; partition++)
|
||||
{
|
||||
const uint8_t *weights = pt->texels_of_partition[partition];
|
||||
int texelcount = pt->texels_per_partition[partition];
|
||||
|
||||
float3 base_sum = float3(0, 0, 0);
|
||||
float partition_weight = 0.0f;
|
||||
|
||||
for (i = 0; i < texelcount; i++)
|
||||
{
|
||||
int iwt = weights[i];
|
||||
float weight = texel_weights[iwt];
|
||||
float3 texel_datum = float3(blk->work_data[4 * iwt],
|
||||
blk->work_data[4 * iwt + 1],
|
||||
blk->work_data[4 * iwt + 2]) * weight;
|
||||
partition_weight += weight;
|
||||
|
||||
base_sum = base_sum + texel_datum;
|
||||
}
|
||||
|
||||
float4 csf = color_scalefactors[partition];
|
||||
float3 average = base_sum * 1.0f / MAX(partition_weight, 1e-7f);
|
||||
averages[partition] = average * csf.xyz;
|
||||
|
||||
|
||||
float3 sum_xp = float3(0, 0, 0);
|
||||
float3 sum_yp = float3(0, 0, 0);
|
||||
float3 sum_zp = float3(0, 0, 0);
|
||||
|
||||
for (i = 0; i < texelcount; i++)
|
||||
{
|
||||
int iwt = weights[i];
|
||||
float weight = texel_weights[iwt];
|
||||
float3 texel_datum = float3(blk->work_data[4 * iwt],
|
||||
blk->work_data[4 * iwt + 1],
|
||||
blk->work_data[4 * iwt + 2]);
|
||||
texel_datum = (texel_datum - average) * weight;
|
||||
|
||||
if (texel_datum.x > 0.0f)
|
||||
sum_xp = sum_xp + texel_datum;
|
||||
if (texel_datum.y > 0.0f)
|
||||
sum_yp = sum_yp + texel_datum;
|
||||
if (texel_datum.z > 0.0f)
|
||||
sum_zp = sum_zp + texel_datum;
|
||||
}
|
||||
|
||||
float prod_xp = dot(sum_xp, sum_xp);
|
||||
float prod_yp = dot(sum_yp, sum_yp);
|
||||
float prod_zp = dot(sum_zp, sum_zp);
|
||||
|
||||
float3 best_vector = sum_xp;
|
||||
float best_sum = prod_xp;
|
||||
if (prod_yp > best_sum)
|
||||
{
|
||||
best_vector = sum_yp;
|
||||
best_sum = prod_yp;
|
||||
}
|
||||
if (prod_zp > best_sum)
|
||||
{
|
||||
best_vector = sum_zp;
|
||||
best_sum = prod_zp;
|
||||
}
|
||||
|
||||
directions_rgb[partition] = best_vector;
|
||||
directions_gb[partition] = best_vector.yz;
|
||||
directions_rb[partition] = best_vector.xz;
|
||||
directions_rg[partition] = best_vector.xy;
|
||||
}
|
||||
}
|
||||
|
||||
void compute_averages_and_directions_3_components(const partition_info * pt,
|
||||
const imageblock * blk,
|
||||
const error_weight_block * ewb,
|
||||
const float3 * color_scalefactors, int component1, int component2, int component3, float3 * averages, float3 * directions)
|
||||
{
|
||||
int i;
|
||||
int partition_count = pt->partition_count;
|
||||
int partition;
|
||||
|
||||
const float *texel_weights;
|
||||
if (component1 == 1 && component2 == 2 && component3 == 3)
|
||||
texel_weights = ewb->texel_weight_gba;
|
||||
else if (component1 == 0 && component2 == 2 && component3 == 3)
|
||||
texel_weights = ewb->texel_weight_rba;
|
||||
else if (component1 == 0 && component2 == 1 && component3 == 3)
|
||||
texel_weights = ewb->texel_weight_rga;
|
||||
else if (component1 == 0 && component2 == 1 && component3 == 2)
|
||||
texel_weights = ewb->texel_weight_rgb;
|
||||
else
|
||||
{
|
||||
texel_weights = ewb->texel_weight_gba;
|
||||
ASTC_CODEC_INTERNAL_ERROR;
|
||||
}
|
||||
|
||||
|
||||
for (partition = 0; partition < partition_count; partition++)
|
||||
{
|
||||
const uint8_t *weights = pt->texels_of_partition[partition];
|
||||
int texelcount = pt->texels_per_partition[partition];
|
||||
|
||||
float3 base_sum = float3(0, 0, 0);
|
||||
float partition_weight = 0.0f;
|
||||
|
||||
for (i = 0; i < texelcount; i++)
|
||||
{
|
||||
int iwt = weights[i];
|
||||
float weight = texel_weights[iwt];
|
||||
float3 texel_datum = float3(blk->work_data[4 * iwt + component1],
|
||||
blk->work_data[4 * iwt + component2],
|
||||
blk->work_data[4 * iwt + component3]) * weight;
|
||||
partition_weight += weight;
|
||||
|
||||
base_sum = base_sum + texel_datum;
|
||||
}
|
||||
|
||||
float3 csf = color_scalefactors[partition];
|
||||
|
||||
float3 average = base_sum * 1.0f / MAX(partition_weight, 1e-7f);
|
||||
averages[partition] = average * csf.xyz;
|
||||
|
||||
|
||||
float3 sum_xp = float3(0, 0, 0);
|
||||
float3 sum_yp = float3(0, 0, 0);
|
||||
float3 sum_zp = float3(0, 0, 0);
|
||||
|
||||
for (i = 0; i < texelcount; i++)
|
||||
{
|
||||
int iwt = weights[i];
|
||||
float weight = texel_weights[iwt];
|
||||
float3 texel_datum = float3(blk->work_data[4 * iwt + component1],
|
||||
blk->work_data[4 * iwt + component2],
|
||||
blk->work_data[4 * iwt + component3]);
|
||||
texel_datum = (texel_datum - average) * weight;
|
||||
|
||||
if (texel_datum.x > 0.0f)
|
||||
sum_xp = sum_xp + texel_datum;
|
||||
if (texel_datum.y > 0.0f)
|
||||
sum_yp = sum_yp + texel_datum;
|
||||
if (texel_datum.z > 0.0f)
|
||||
sum_zp = sum_zp + texel_datum;
|
||||
}
|
||||
|
||||
float prod_xp = dot(sum_xp, sum_xp);
|
||||
float prod_yp = dot(sum_yp, sum_yp);
|
||||
float prod_zp = dot(sum_zp, sum_zp);
|
||||
|
||||
float3 best_vector = sum_xp;
|
||||
float best_sum = prod_xp;
|
||||
if (prod_yp > best_sum)
|
||||
{
|
||||
best_vector = sum_yp;
|
||||
best_sum = prod_yp;
|
||||
}
|
||||
if (prod_zp > best_sum)
|
||||
{
|
||||
best_vector = sum_zp;
|
||||
best_sum = prod_zp;
|
||||
}
|
||||
|
||||
if (dot(best_vector, best_vector) < 1e-18)
|
||||
best_vector = float3(1, 1, 1);
|
||||
directions[partition] = best_vector;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void compute_averages_and_directions_2_components(const partition_info * pt,
|
||||
const imageblock * blk,
|
||||
const error_weight_block * ewb, const float2 * color_scalefactors, int component1, int component2, float2 * averages, float2 * directions)
|
||||
{
|
||||
int i;
|
||||
int partition_count = pt->partition_count;
|
||||
int partition;
|
||||
|
||||
const float *texel_weights;
|
||||
if (component1 == 0 && component2 == 1)
|
||||
texel_weights = ewb->texel_weight_rg;
|
||||
else if (component1 == 0 && component2 == 2)
|
||||
texel_weights = ewb->texel_weight_rb;
|
||||
else if (component1 == 1 && component2 == 2)
|
||||
texel_weights = ewb->texel_weight_gb;
|
||||
else
|
||||
{
|
||||
texel_weights = ewb->texel_weight_rg;
|
||||
// unsupported set of color components.
|
||||
ASTC_CODEC_INTERNAL_ERROR;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
for (partition = 0; partition < partition_count; partition++)
|
||||
{
|
||||
const uint8_t *weights = pt->texels_of_partition[partition];
|
||||
int texelcount = pt->texels_per_partition[partition];
|
||||
|
||||
float2 base_sum = float2(0, 0);
|
||||
float partition_weight = 0.0f;
|
||||
|
||||
for (i = 0; i < texelcount; i++)
|
||||
{
|
||||
int iwt = weights[i];
|
||||
float weight = texel_weights[iwt];
|
||||
float2 texel_datum = float2(blk->work_data[4 * iwt + component1],
|
||||
blk->work_data[4 * iwt + component2]) * weight;
|
||||
partition_weight += weight;
|
||||
|
||||
base_sum = base_sum + texel_datum;
|
||||
}
|
||||
|
||||
float2 csf = color_scalefactors[partition];
|
||||
|
||||
float2 average = base_sum * 1.0f / MAX(partition_weight, 1e-7f);
|
||||
averages[partition] = average * csf.xy;
|
||||
|
||||
|
||||
float2 sum_xp = float2(0, 0);
|
||||
float2 sum_yp = float2(0, 0);
|
||||
|
||||
for (i = 0; i < texelcount; i++)
|
||||
{
|
||||
int iwt = weights[i];
|
||||
float weight = texel_weights[iwt];
|
||||
float2 texel_datum = float2(blk->work_data[4 * iwt + component1],
|
||||
blk->work_data[4 * iwt + component2]);
|
||||
texel_datum = (texel_datum - average) * weight;
|
||||
|
||||
if (texel_datum.x > 0.0f)
|
||||
sum_xp = sum_xp + texel_datum;
|
||||
if (texel_datum.y > 0.0f)
|
||||
sum_yp = sum_yp + texel_datum;
|
||||
}
|
||||
|
||||
float prod_xp = dot(sum_xp, sum_xp);
|
||||
float prod_yp = dot(sum_yp, sum_yp);
|
||||
|
||||
float2 best_vector = sum_xp;
|
||||
float best_sum = prod_xp;
|
||||
if (prod_yp > best_sum)
|
||||
{
|
||||
best_vector = sum_yp;
|
||||
best_sum = prod_yp;
|
||||
}
|
||||
|
||||
directions[partition] = best_vector;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
#define XPASTE(x,y) x##y
|
||||
#define PASTE(x,y) XPASTE(x,y)
|
||||
|
||||
#define TWO_COMPONENT_ERROR_FUNC( funcname, c0_iwt, c1_iwt, c01_name, c01_rname ) \
|
||||
float funcname( \
|
||||
const partition_info *pt, \
|
||||
const imageblock *blk, \
|
||||
const error_weight_block *ewb, \
|
||||
const processed_line2 *plines, \
|
||||
float *length_of_lines \
|
||||
) \
|
||||
{ \
|
||||
int i; \
|
||||
float errorsum = 0.0f; \
|
||||
int partition; \
|
||||
for(partition=0; partition<pt->partition_count; partition++) \
|
||||
{ \
|
||||
const uint8_t *weights = pt->texels_of_partition[ partition ]; \
|
||||
int texelcount = pt->texels_per_partition[ partition ]; \
|
||||
float lowparam = 1e10f; \
|
||||
float highparam = -1e10f; \
|
||||
processed_line2 l = plines[partition]; \
|
||||
if( ewb->contains_zeroweight_texels ) \
|
||||
{ \
|
||||
for(i=0;i<texelcount;i++) \
|
||||
{ \
|
||||
int iwt = weights[i]; \
|
||||
float texel_weight = ewb-> PASTE(texel_weight_ , c01_rname) [i]; \
|
||||
if( texel_weight > 1e-20f ) \
|
||||
{ \
|
||||
float2 point = float2(blk->work_data[4*iwt + c0_iwt], blk->work_data[4*iwt + c1_iwt] ); \
|
||||
float param = dot( point, l.bs ); \
|
||||
float2 rp1 = l.amod + param*l.bis; \
|
||||
float2 dist = rp1 - point; \
|
||||
float4 ews = ewb->error_weights[iwt]; \
|
||||
errorsum += dot( ews. c01_name, dist*dist ); \
|
||||
if( param < lowparam ) lowparam = param; \
|
||||
if( param > highparam ) highparam = param; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for(i=0;i<texelcount;i++) \
|
||||
{ \
|
||||
int iwt = weights[i]; \
|
||||
float2 point = float2(blk->work_data[4*iwt + c0_iwt], blk->work_data[4*iwt + c1_iwt] ); \
|
||||
float param = dot( point, l.bs ); \
|
||||
float2 rp1 = l.amod + param*l.bis; \
|
||||
float2 dist = rp1 - point; \
|
||||
float4 ews = ewb->error_weights[iwt]; \
|
||||
errorsum += dot( ews. c01_name, dist*dist ); \
|
||||
if( param < lowparam ) lowparam = param; \
|
||||
if( param > highparam ) highparam = param; \
|
||||
} \
|
||||
} \
|
||||
float linelen = highparam - lowparam; \
|
||||
if( !(linelen > 1e-7f) ) \
|
||||
linelen = 1e-7f; \
|
||||
length_of_lines[partition] = linelen; \
|
||||
} \
|
||||
return errorsum; \
|
||||
}
|
||||
|
||||
|
||||
TWO_COMPONENT_ERROR_FUNC(compute_error_squared_rg, 0, 1, xy, rg)
|
||||
TWO_COMPONENT_ERROR_FUNC(compute_error_squared_rb, 0, 2, xz, rb)
|
||||
TWO_COMPONENT_ERROR_FUNC(compute_error_squared_gb, 1, 2, yz, gb)
|
||||
TWO_COMPONENT_ERROR_FUNC(compute_error_squared_ra, 0, 3, zw, ra)
|
||||
|
||||
// function to compute the error across a tile when using a particular set of
|
||||
// lines for a particular partitioning. Also compute the length of each
|
||||
// color-space line in each partitioning.
|
||||
|
||||
#define THREE_COMPONENT_ERROR_FUNC( funcname, c0_iwt, c1_iwt, c2_iwt, c012_name, c012_rname ) \
|
||||
float funcname( \
|
||||
const partition_info *pt, \
|
||||
const imageblock *blk, \
|
||||
const error_weight_block *ewb, \
|
||||
const processed_line3 *plines, \
|
||||
float *length_of_lines \
|
||||
) \
|
||||
{ \
|
||||
int i; \
|
||||
float errorsum = 0.0f; \
|
||||
int partition; \
|
||||
for(partition=0; partition<pt->partition_count; partition++) \
|
||||
{ \
|
||||
const uint8_t *weights = pt->texels_of_partition[ partition ]; \
|
||||
int texelcount = pt->texels_per_partition[ partition ]; \
|
||||
float lowparam = 1e10f; \
|
||||
float highparam = -1e10f; \
|
||||
processed_line3 l = plines[partition]; \
|
||||
if( ewb->contains_zeroweight_texels ) \
|
||||
{ \
|
||||
for(i=0;i<texelcount;i++) \
|
||||
{ \
|
||||
int iwt = weights[i]; \
|
||||
float texel_weight = ewb-> PASTE(texel_weight_ , c012_rname) [i]; \
|
||||
if( texel_weight > 1e-20f ) \
|
||||
{ \
|
||||
float3 point = float3(blk->work_data[4*iwt + c0_iwt], blk->work_data[4*iwt + c1_iwt], blk->work_data[4*iwt + c2_iwt] ); \
|
||||
float param = dot( point, l.bs ); \
|
||||
float3 rp1 = l.amod + param*l.bis; \
|
||||
float3 dist = rp1 - point; \
|
||||
float4 ews = ewb->error_weights[iwt]; \
|
||||
errorsum += dot( ews. c012_name, dist*dist ); \
|
||||
if( param < lowparam ) lowparam = param; \
|
||||
if( param > highparam ) highparam = param; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for(i=0;i<texelcount;i++) \
|
||||
{ \
|
||||
int iwt = weights[i]; \
|
||||
float3 point = float3(blk->work_data[4*iwt + c0_iwt], blk->work_data[4*iwt + c1_iwt], blk->work_data[4*iwt + c2_iwt] ); \
|
||||
float param = dot( point, l.bs ); \
|
||||
float3 rp1 = l.amod + param*l.bis; \
|
||||
float3 dist = rp1 - point; \
|
||||
float4 ews = ewb->error_weights[iwt]; \
|
||||
errorsum += dot( ews. c012_name, dist*dist ); \
|
||||
if( param < lowparam ) lowparam = param; \
|
||||
if( param > highparam ) highparam = param; \
|
||||
} \
|
||||
} \
|
||||
float linelen = highparam - lowparam; \
|
||||
if( !(linelen > 1e-7f) ) \
|
||||
linelen = 1e-7f; \
|
||||
length_of_lines[partition] = linelen; \
|
||||
} \
|
||||
return errorsum; \
|
||||
}
|
||||
|
||||
THREE_COMPONENT_ERROR_FUNC(compute_error_squared_gba, 1, 2, 3, yzw, gba)
|
||||
THREE_COMPONENT_ERROR_FUNC(compute_error_squared_rba, 0, 2, 3, xzw, rba)
|
||||
THREE_COMPONENT_ERROR_FUNC(compute_error_squared_rga, 0, 1, 3, xyw, rga)
|
||||
THREE_COMPONENT_ERROR_FUNC(compute_error_squared_rgb, 0, 1, 2, xyz, rgb)
|
||||
|
||||
float compute_error_squared_rgba(const partition_info * pt, // the partition that we use when computing the squared-error.
|
||||
const imageblock * blk, const error_weight_block * ewb, const processed_line4 * plines, float *length_of_lines)
|
||||
{
|
||||
int i;
|
||||
|
||||
float errorsum = 0.0f;
|
||||
int partition;
|
||||
for (partition = 0; partition < pt->partition_count; partition++)
|
||||
{
|
||||
const uint8_t *weights = pt->texels_of_partition[partition];
|
||||
int texelcount = pt->texels_per_partition[partition];
|
||||
float lowparam = 1e10;
|
||||
float highparam = -1e10;
|
||||
|
||||
processed_line4 l = plines[partition];
|
||||
|
||||
if (ewb->contains_zeroweight_texels)
|
||||
{
|
||||
for (i = 0; i < texelcount; i++)
|
||||
{
|
||||
int iwt = weights[i];
|
||||
if (ewb->texel_weight[iwt] > 1e-20)
|
||||
{
|
||||
float4 point = float4(blk->work_data[4 * iwt], blk->work_data[4 * iwt + 1], blk->work_data[4 * iwt + 2], blk->work_data[4 * iwt + 3]);
|
||||
float param = dot(point, l.bs);
|
||||
float4 rp1 = l.amod + param * l.bis;
|
||||
float4 dist = rp1 - point;
|
||||
float4 ews = ewb->error_weights[iwt];
|
||||
errorsum += dot(ews, dist * dist);
|
||||
if (param < lowparam)
|
||||
lowparam = param;
|
||||
if (param > highparam)
|
||||
highparam = param;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < texelcount; i++)
|
||||
{
|
||||
int iwt = weights[i];
|
||||
float4 point = float4(blk->work_data[4 * iwt], blk->work_data[4 * iwt + 1], blk->work_data[4 * iwt + 2], blk->work_data[4 * iwt + 3]);
|
||||
float param = dot(point, l.bs);
|
||||
float4 rp1 = l.amod + param * l.bis;
|
||||
float4 dist = rp1 - point;
|
||||
float4 ews = ewb->error_weights[iwt];
|
||||
errorsum += dot(ews, dist * dist);
|
||||
if (param < lowparam)
|
||||
lowparam = param;
|
||||
if (param > highparam)
|
||||
highparam = param;
|
||||
}
|
||||
}
|
||||
|
||||
float linelen = highparam - lowparam;
|
||||
if (!(linelen > 1e-7f))
|
||||
linelen = 1e-7f;
|
||||
length_of_lines[partition] = linelen;
|
||||
}
|
||||
|
||||
return errorsum;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// function to compute the error across a tile when using a particular line for
|
||||
// a particular partition.
|
||||
float compute_error_squared_rgb_single_partition(int partition_to_test, int xdim, int ydim, int zdim, const partition_info * pt, // the partition that we use when computing the squared-error.
|
||||
const imageblock * blk, const error_weight_block * ewb, const processed_line3 * lin // the line for the partition.
|
||||
)
|
||||
{
|
||||
int i;
|
||||
|
||||
int texels_per_block = xdim * ydim * zdim;
|
||||
|
||||
float errorsum = 0.0f;
|
||||
|
||||
for (i = 0; i < texels_per_block; i++)
|
||||
{
|
||||
int partition = pt->partition_of_texel[i];
|
||||
float texel_weight = ewb->texel_weight_rgb[i];
|
||||
if (partition != partition_to_test || texel_weight < 1e-20)
|
||||
continue;
|
||||
float3 point = float3(blk->work_data[4 * i], blk->work_data[4 * i + 1], blk->work_data[4 * i + 2]);
|
||||
|
||||
float param = dot(point, lin->bs);
|
||||
float3 rp1 = lin->amod + param * lin->bis;
|
||||
float3 dist = rp1 - point;
|
||||
float4 ews = ewb->error_weights[i];
|
||||
|
||||
errorsum += dot(ews.xyz, dist * dist);
|
||||
}
|
||||
return errorsum;
|
||||
}
|
||||
977
3rdparty/astc/astc_block_sizes2.cpp
vendored
Normal file
977
3rdparty/astc/astc_block_sizes2.cpp
vendored
Normal file
@@ -0,0 +1,977 @@
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This confidential and proprietary software may be used only as
|
||||
* authorised by a licensing agreement from ARM Limited
|
||||
* (C) COPYRIGHT 2011-2012 ARM Limited
|
||||
* ALL RIGHTS RESERVED
|
||||
*
|
||||
* The entire notice above must be reproduced on all authorised
|
||||
* copies and copies may only be made to the extent permitted
|
||||
* by a licensing agreement from ARM Limited.
|
||||
*
|
||||
* @brief For ASTC, generate the block size descriptor and the associated
|
||||
* decimation tables.
|
||||
*/
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
#include "astc_codec_internals.h"
|
||||
|
||||
extern const float percentile_table_4x4[2048];
|
||||
extern const float percentile_table_4x5[2048];
|
||||
extern const float percentile_table_4x6[2048];
|
||||
extern const float percentile_table_4x8[2048];
|
||||
extern const float percentile_table_4x10[2048];
|
||||
extern const float percentile_table_4x12[2048];
|
||||
extern const float percentile_table_5x4[2048];
|
||||
extern const float percentile_table_5x5[2048];
|
||||
extern const float percentile_table_5x6[2048];
|
||||
extern const float percentile_table_5x8[2048];
|
||||
extern const float percentile_table_5x10[2048];
|
||||
extern const float percentile_table_5x12[2048];
|
||||
extern const float percentile_table_6x4[2048];
|
||||
extern const float percentile_table_6x5[2048];
|
||||
extern const float percentile_table_6x6[2048];
|
||||
extern const float percentile_table_6x8[2048];
|
||||
extern const float percentile_table_6x10[2048];
|
||||
extern const float percentile_table_6x12[2048];
|
||||
extern const float percentile_table_8x4[2048];
|
||||
extern const float percentile_table_8x5[2048];
|
||||
extern const float percentile_table_8x6[2048];
|
||||
extern const float percentile_table_8x8[2048];
|
||||
extern const float percentile_table_8x10[2048];
|
||||
extern const float percentile_table_8x12[2048];
|
||||
extern const float percentile_table_10x4[2048];
|
||||
extern const float percentile_table_10x5[2048];
|
||||
extern const float percentile_table_10x6[2048];
|
||||
extern const float percentile_table_10x8[2048];
|
||||
extern const float percentile_table_10x10[2048];
|
||||
extern const float percentile_table_10x12[2048];
|
||||
extern const float percentile_table_12x4[2048];
|
||||
extern const float percentile_table_12x5[2048];
|
||||
extern const float percentile_table_12x6[2048];
|
||||
extern const float percentile_table_12x8[2048];
|
||||
extern const float percentile_table_12x10[2048];
|
||||
extern const float percentile_table_12x12[2048];
|
||||
|
||||
const float *get_2d_percentile_table(int blockdim_x, int blockdim_y)
|
||||
{
|
||||
switch (blockdim_x)
|
||||
{
|
||||
case 4:
|
||||
switch (blockdim_y)
|
||||
{
|
||||
case 4:
|
||||
return percentile_table_4x4;
|
||||
case 5:
|
||||
return percentile_table_4x5;
|
||||
case 6:
|
||||
return percentile_table_4x6;
|
||||
case 8:
|
||||
return percentile_table_4x8;
|
||||
case 10:
|
||||
return percentile_table_4x10;
|
||||
case 12:
|
||||
return percentile_table_4x12;
|
||||
}
|
||||
break;
|
||||
case 5:
|
||||
switch (blockdim_y)
|
||||
{
|
||||
case 4:
|
||||
return percentile_table_5x4;
|
||||
case 5:
|
||||
return percentile_table_5x5;
|
||||
case 6:
|
||||
return percentile_table_5x6;
|
||||
case 8:
|
||||
return percentile_table_5x8;
|
||||
case 10:
|
||||
return percentile_table_5x10;
|
||||
case 12:
|
||||
return percentile_table_5x12;
|
||||
}
|
||||
break;
|
||||
|
||||
case 6:
|
||||
switch (blockdim_y)
|
||||
{
|
||||
case 4:
|
||||
return percentile_table_6x4;
|
||||
case 5:
|
||||
return percentile_table_6x5;
|
||||
case 6:
|
||||
return percentile_table_6x6;
|
||||
case 8:
|
||||
return percentile_table_6x8;
|
||||
case 10:
|
||||
return percentile_table_6x10;
|
||||
case 12:
|
||||
return percentile_table_6x12;
|
||||
}
|
||||
break;
|
||||
|
||||
case 8:
|
||||
switch (blockdim_y)
|
||||
{
|
||||
case 4:
|
||||
return percentile_table_8x4;
|
||||
case 5:
|
||||
return percentile_table_8x5;
|
||||
case 6:
|
||||
return percentile_table_8x6;
|
||||
case 8:
|
||||
return percentile_table_8x8;
|
||||
case 10:
|
||||
return percentile_table_8x10;
|
||||
case 12:
|
||||
return percentile_table_8x12;
|
||||
}
|
||||
break;
|
||||
|
||||
case 10:
|
||||
switch (blockdim_y)
|
||||
{
|
||||
case 4:
|
||||
return percentile_table_10x4;
|
||||
case 5:
|
||||
return percentile_table_10x5;
|
||||
case 6:
|
||||
return percentile_table_10x6;
|
||||
case 8:
|
||||
return percentile_table_10x8;
|
||||
case 10:
|
||||
return percentile_table_10x10;
|
||||
case 12:
|
||||
return percentile_table_10x12;
|
||||
}
|
||||
break;
|
||||
|
||||
case 12:
|
||||
switch (blockdim_y)
|
||||
{
|
||||
case 4:
|
||||
return percentile_table_12x4;
|
||||
case 5:
|
||||
return percentile_table_12x5;
|
||||
case 6:
|
||||
return percentile_table_12x6;
|
||||
case 8:
|
||||
return percentile_table_12x8;
|
||||
case 10:
|
||||
return percentile_table_12x10;
|
||||
case 12:
|
||||
return percentile_table_12x12;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return NULL; // should never happen.
|
||||
}
|
||||
|
||||
// stubbed for the time being.
|
||||
static const float dummy_percentile_table_3d[2048] = { 0 };
|
||||
const float *get_3d_percentile_table(int blockdim_x, int blockdim_y, int blockdim_z)
|
||||
{
|
||||
IGNORE(blockdim_x);
|
||||
IGNORE(blockdim_y);
|
||||
IGNORE(blockdim_z);
|
||||
return dummy_percentile_table_3d;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// return 0 on invalid mode, 1 on valid mode.
|
||||
static int decode_block_mode_2d(int blockmode, int *Nval, int *Mval, int *dual_weight_plane, int *quant_mode)
|
||||
{
|
||||
int base_quant_mode = (blockmode >> 4) & 1;
|
||||
int H = (blockmode >> 9) & 1;
|
||||
int D = (blockmode >> 10) & 1;
|
||||
|
||||
int A = (blockmode >> 5) & 0x3;
|
||||
|
||||
int N = 0, M = 0;
|
||||
|
||||
if ((blockmode & 3) != 0)
|
||||
{
|
||||
base_quant_mode |= (blockmode & 3) << 1;
|
||||
int B = (blockmode >> 7) & 3;
|
||||
switch ((blockmode >> 2) & 3)
|
||||
{
|
||||
case 0:
|
||||
N = B + 4;
|
||||
M = A + 2;
|
||||
break;
|
||||
case 1:
|
||||
N = B + 8;
|
||||
M = A + 2;
|
||||
break;
|
||||
case 2:
|
||||
N = A + 2;
|
||||
M = B + 8;
|
||||
break;
|
||||
case 3:
|
||||
B &= 1;
|
||||
if (blockmode & 0x100)
|
||||
{
|
||||
N = B + 2;
|
||||
M = A + 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
N = A + 2;
|
||||
M = B + 6;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
base_quant_mode |= ((blockmode >> 2) & 3) << 1;
|
||||
if (((blockmode >> 2) & 3) == 0)
|
||||
return 0;
|
||||
int B = (blockmode >> 9) & 3;
|
||||
switch ((blockmode >> 7) & 3)
|
||||
{
|
||||
case 0:
|
||||
N = 12;
|
||||
M = A + 2;
|
||||
break;
|
||||
case 1:
|
||||
N = A + 2;
|
||||
M = 12;
|
||||
break;
|
||||
case 2:
|
||||
N = A + 6;
|
||||
M = B + 6;
|
||||
D = 0;
|
||||
H = 0;
|
||||
break;
|
||||
case 3:
|
||||
switch ((blockmode >> 5) & 3)
|
||||
{
|
||||
case 0:
|
||||
N = 6;
|
||||
M = 10;
|
||||
break;
|
||||
case 1:
|
||||
N = 10;
|
||||
M = 6;
|
||||
break;
|
||||
case 2:
|
||||
case 3:
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int weight_count = N * M * (D + 1);
|
||||
int qmode = (base_quant_mode - 2) + 6 * H;
|
||||
|
||||
int weightbits = compute_ise_bitcount(weight_count, (quantization_method) qmode);
|
||||
if (weight_count > MAX_WEIGHTS_PER_BLOCK || weightbits < MIN_WEIGHT_BITS_PER_BLOCK || weightbits > MAX_WEIGHT_BITS_PER_BLOCK)
|
||||
return 0;
|
||||
|
||||
*Nval = N;
|
||||
*Mval = M;
|
||||
*dual_weight_plane = D;
|
||||
*quant_mode = qmode;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
static int decode_block_mode_3d(int blockmode, int *Nval, int *Mval, int *Qval, int *dual_weight_plane, int *quant_mode)
|
||||
{
|
||||
int base_quant_mode = (blockmode >> 4) & 1;
|
||||
int H = (blockmode >> 9) & 1;
|
||||
int D = (blockmode >> 10) & 1;
|
||||
|
||||
int A = (blockmode >> 5) & 0x3;
|
||||
|
||||
int N = 0, M = 0, Q = 0;
|
||||
|
||||
if ((blockmode & 3) != 0)
|
||||
{
|
||||
base_quant_mode |= (blockmode & 3) << 1;
|
||||
int B = (blockmode >> 7) & 3;
|
||||
int C = (blockmode >> 2) & 0x3;
|
||||
N = A + 2;
|
||||
M = B + 2;
|
||||
Q = C + 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
base_quant_mode |= ((blockmode >> 2) & 3) << 1;
|
||||
if (((blockmode >> 2) & 3) == 0)
|
||||
return 0;
|
||||
int B = (blockmode >> 9) & 3;
|
||||
if (((blockmode >> 7) & 3) != 3)
|
||||
{
|
||||
D = 0;
|
||||
H = 0;
|
||||
}
|
||||
switch ((blockmode >> 7) & 3)
|
||||
{
|
||||
case 0:
|
||||
N = 6;
|
||||
M = B + 2;
|
||||
Q = A + 2;
|
||||
break;
|
||||
case 1:
|
||||
N = A + 2;
|
||||
M = 6;
|
||||
Q = B + 2;
|
||||
break;
|
||||
case 2:
|
||||
N = A + 2;
|
||||
M = B + 2;
|
||||
Q = 6;
|
||||
break;
|
||||
case 3:
|
||||
N = 2;
|
||||
M = 2;
|
||||
Q = 2;
|
||||
switch ((blockmode >> 5) & 3)
|
||||
{
|
||||
case 0:
|
||||
N = 6;
|
||||
break;
|
||||
case 1:
|
||||
M = 6;
|
||||
break;
|
||||
case 2:
|
||||
Q = 6;
|
||||
break;
|
||||
case 3:
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int weight_count = N * M * Q * (D + 1);
|
||||
int qmode = (base_quant_mode - 2) + 6 * H;
|
||||
|
||||
int weightbits = compute_ise_bitcount(weight_count, (quantization_method) qmode);
|
||||
if (weight_count > MAX_WEIGHTS_PER_BLOCK || weightbits < MIN_WEIGHT_BITS_PER_BLOCK || weightbits > MAX_WEIGHT_BITS_PER_BLOCK)
|
||||
return 0;
|
||||
|
||||
*Nval = N;
|
||||
*Mval = M;
|
||||
*Qval = Q;
|
||||
*dual_weight_plane = D;
|
||||
*quant_mode = qmode;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
static void initialize_decimation_table_2d(
|
||||
// dimensions of the block
|
||||
int xdim, int ydim,
|
||||
// number of grid points in 2d weight grid
|
||||
int x_weights, int y_weights, decimation_table * dt)
|
||||
{
|
||||
int i, j;
|
||||
int x, y;
|
||||
|
||||
int texels_per_block = xdim * ydim;
|
||||
int weights_per_block = x_weights * y_weights;
|
||||
|
||||
int weightcount_of_texel[MAX_TEXELS_PER_BLOCK];
|
||||
int grid_weights_of_texel[MAX_TEXELS_PER_BLOCK][4];
|
||||
int weights_of_texel[MAX_TEXELS_PER_BLOCK][4];
|
||||
|
||||
int texelcount_of_weight[MAX_WEIGHTS_PER_BLOCK];
|
||||
int texels_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];
|
||||
int texelweights_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];
|
||||
|
||||
for (i = 0; i < weights_per_block; i++)
|
||||
texelcount_of_weight[i] = 0;
|
||||
for (i = 0; i < texels_per_block; i++)
|
||||
weightcount_of_texel[i] = 0;
|
||||
|
||||
for (y = 0; y < ydim; y++)
|
||||
for (x = 0; x < xdim; x++)
|
||||
{
|
||||
int texel = y * xdim + x;
|
||||
|
||||
int x_weight = (((1024 + xdim / 2) / (xdim - 1)) * x * (x_weights - 1) + 32) >> 6;
|
||||
int y_weight = (((1024 + ydim / 2) / (ydim - 1)) * y * (y_weights - 1) + 32) >> 6;
|
||||
|
||||
int x_weight_frac = x_weight & 0xF;
|
||||
int y_weight_frac = y_weight & 0xF;
|
||||
int x_weight_int = x_weight >> 4;
|
||||
int y_weight_int = y_weight >> 4;
|
||||
int qweight[4];
|
||||
int weight[4];
|
||||
qweight[0] = x_weight_int + y_weight_int * x_weights;
|
||||
qweight[1] = qweight[0] + 1;
|
||||
qweight[2] = qweight[0] + x_weights;
|
||||
qweight[3] = qweight[2] + 1;
|
||||
|
||||
// truncated-precision bilinear interpolation.
|
||||
int prod = x_weight_frac * y_weight_frac;
|
||||
|
||||
weight[3] = (prod + 8) >> 4;
|
||||
weight[1] = x_weight_frac - weight[3];
|
||||
weight[2] = y_weight_frac - weight[3];
|
||||
weight[0] = 16 - x_weight_frac - y_weight_frac + weight[3];
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
if (weight[i] != 0)
|
||||
{
|
||||
grid_weights_of_texel[texel][weightcount_of_texel[texel]] = qweight[i];
|
||||
weights_of_texel[texel][weightcount_of_texel[texel]] = weight[i];
|
||||
weightcount_of_texel[texel]++;
|
||||
texels_of_weight[qweight[i]][texelcount_of_weight[qweight[i]]] = texel;
|
||||
texelweights_of_weight[qweight[i]][texelcount_of_weight[qweight[i]]] = weight[i];
|
||||
texelcount_of_weight[qweight[i]]++;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < texels_per_block; i++)
|
||||
{
|
||||
dt->texel_num_weights[i] = weightcount_of_texel[i];
|
||||
|
||||
// ensure that all 4 entries are actually initialized.
|
||||
// This allows a branch-free implementation of compute_value_of_texel_flt()
|
||||
for (j = 0; j < 4; j++)
|
||||
{
|
||||
dt->texel_weights_int[i][j] = 0;
|
||||
dt->texel_weights_float[i][j] = 0.0f;
|
||||
dt->texel_weights[i][j] = 0;
|
||||
}
|
||||
|
||||
for (j = 0; j < weightcount_of_texel[i]; j++)
|
||||
{
|
||||
dt->texel_weights_int[i][j] = weights_of_texel[i][j];
|
||||
dt->texel_weights_float[i][j] = static_cast < float >(weights_of_texel[i][j]) * (1.0f / TEXEL_WEIGHT_SUM);
|
||||
dt->texel_weights[i][j] = grid_weights_of_texel[i][j];
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < weights_per_block; i++)
|
||||
{
|
||||
dt->weight_num_texels[i] = texelcount_of_weight[i];
|
||||
|
||||
|
||||
for (j = 0; j < texelcount_of_weight[i]; j++)
|
||||
{
|
||||
dt->weight_texel[i][j] = texels_of_weight[i][j];
|
||||
dt->weights_int[i][j] = texelweights_of_weight[i][j];
|
||||
dt->weights_flt[i][j] = static_cast < float >(texelweights_of_weight[i][j]);
|
||||
}
|
||||
}
|
||||
|
||||
dt->num_texels = texels_per_block;
|
||||
dt->num_weights = weights_per_block;
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
static void initialize_decimation_table_3d(
|
||||
// dimensions of the block
|
||||
int xdim, int ydim, int zdim,
|
||||
// number of grid points in 3d weight grid
|
||||
int x_weights, int y_weights, int z_weights, decimation_table * dt)
|
||||
{
|
||||
int i, j;
|
||||
int x, y, z;
|
||||
|
||||
int texels_per_block = xdim * ydim * zdim;
|
||||
int weights_per_block = x_weights * y_weights * z_weights;
|
||||
|
||||
int weightcount_of_texel[MAX_TEXELS_PER_BLOCK];
|
||||
int grid_weights_of_texel[MAX_TEXELS_PER_BLOCK][4];
|
||||
int weights_of_texel[MAX_TEXELS_PER_BLOCK][4];
|
||||
|
||||
int texelcount_of_weight[MAX_WEIGHTS_PER_BLOCK];
|
||||
int texels_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];
|
||||
int texelweights_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];
|
||||
|
||||
for (i = 0; i < weights_per_block; i++)
|
||||
texelcount_of_weight[i] = 0;
|
||||
for (i = 0; i < texels_per_block; i++)
|
||||
weightcount_of_texel[i] = 0;
|
||||
|
||||
for (z = 0; z < zdim; z++)
|
||||
for (y = 0; y < ydim; y++)
|
||||
for (x = 0; x < xdim; x++)
|
||||
{
|
||||
int texel = (z * ydim + y) * xdim + x;
|
||||
|
||||
int x_weight = (((1024 + xdim / 2) / (xdim - 1)) * x * (x_weights - 1) + 32) >> 6;
|
||||
int y_weight = (((1024 + ydim / 2) / (ydim - 1)) * y * (y_weights - 1) + 32) >> 6;
|
||||
int z_weight = (((1024 + zdim / 2) / (zdim - 1)) * z * (z_weights - 1) + 32) >> 6;
|
||||
|
||||
int x_weight_frac = x_weight & 0xF;
|
||||
int y_weight_frac = y_weight & 0xF;
|
||||
int z_weight_frac = z_weight & 0xF;
|
||||
int x_weight_int = x_weight >> 4;
|
||||
int y_weight_int = y_weight >> 4;
|
||||
int z_weight_int = z_weight >> 4;
|
||||
int qweight[4];
|
||||
int weight[4];
|
||||
qweight[0] = (z_weight_int * y_weights + y_weight_int) * x_weights + x_weight_int;
|
||||
qweight[3] = ((z_weight_int + 1) * y_weights + (y_weight_int + 1)) * x_weights + (x_weight_int + 1);
|
||||
|
||||
// simplex interpolation
|
||||
int fs = x_weight_frac;
|
||||
int ft = y_weight_frac;
|
||||
int fp = z_weight_frac;
|
||||
|
||||
int cas = ((fs > ft) << 2) + ((ft > fp) << 1) + ((fs > fp));
|
||||
int N = x_weights;
|
||||
int NM = x_weights * y_weights;
|
||||
|
||||
int s1, s2, w0, w1, w2, w3;
|
||||
switch (cas)
|
||||
{
|
||||
case 7:
|
||||
s1 = 1;
|
||||
s2 = N;
|
||||
w0 = 16 - fs;
|
||||
w1 = fs - ft;
|
||||
w2 = ft - fp;
|
||||
w3 = fp;
|
||||
break;
|
||||
case 3:
|
||||
s1 = N;
|
||||
s2 = 1;
|
||||
w0 = 16 - ft;
|
||||
w1 = ft - fs;
|
||||
w2 = fs - fp;
|
||||
w3 = fp;
|
||||
break;
|
||||
case 5:
|
||||
s1 = 1;
|
||||
s2 = NM;
|
||||
w0 = 16 - fs;
|
||||
w1 = fs - fp;
|
||||
w2 = fp - ft;
|
||||
w3 = ft;
|
||||
break;
|
||||
case 4:
|
||||
s1 = NM;
|
||||
s2 = 1;
|
||||
w0 = 16 - fp;
|
||||
w1 = fp - fs;
|
||||
w2 = fs - ft;
|
||||
w3 = ft;
|
||||
break;
|
||||
case 2:
|
||||
s1 = N;
|
||||
s2 = NM;
|
||||
w0 = 16 - ft;
|
||||
w1 = ft - fp;
|
||||
w2 = fp - fs;
|
||||
w3 = fs;
|
||||
break;
|
||||
case 0:
|
||||
s1 = NM;
|
||||
s2 = N;
|
||||
w0 = 16 - fp;
|
||||
w1 = fp - ft;
|
||||
w2 = ft - fs;
|
||||
w3 = fs;
|
||||
break;
|
||||
|
||||
default:
|
||||
s1 = NM;
|
||||
s2 = N;
|
||||
w0 = 16 - fp;
|
||||
w1 = fp - ft;
|
||||
w2 = ft - fs;
|
||||
w3 = fs;
|
||||
break;
|
||||
}
|
||||
|
||||
qweight[1] = qweight[0] + s1;
|
||||
qweight[2] = qweight[1] + s2;
|
||||
weight[0] = w0;
|
||||
weight[1] = w1;
|
||||
weight[2] = w2;
|
||||
weight[3] = w3;
|
||||
|
||||
/*
|
||||
for(i=0;i<4;i++) weight[i] <<= 4; */
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
if (weight[i] != 0)
|
||||
{
|
||||
grid_weights_of_texel[texel][weightcount_of_texel[texel]] = qweight[i];
|
||||
weights_of_texel[texel][weightcount_of_texel[texel]] = weight[i];
|
||||
weightcount_of_texel[texel]++;
|
||||
texels_of_weight[qweight[i]][texelcount_of_weight[qweight[i]]] = texel;
|
||||
texelweights_of_weight[qweight[i]][texelcount_of_weight[qweight[i]]] = weight[i];
|
||||
texelcount_of_weight[qweight[i]]++;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < texels_per_block; i++)
|
||||
{
|
||||
dt->texel_num_weights[i] = weightcount_of_texel[i];
|
||||
|
||||
// ensure that all 4 entries are actually initialized.
|
||||
// This allows a branch-free implementation of compute_value_of_texel_flt()
|
||||
for (j = 0; j < 4; j++)
|
||||
{
|
||||
dt->texel_weights_int[i][j] = 0;
|
||||
dt->texel_weights_float[i][j] = 0.0f;
|
||||
dt->texel_weights[i][j] = 0;
|
||||
}
|
||||
|
||||
for (j = 0; j < weightcount_of_texel[i]; j++)
|
||||
{
|
||||
dt->texel_weights_int[i][j] = weights_of_texel[i][j];
|
||||
dt->texel_weights_float[i][j] = static_cast < float >(weights_of_texel[i][j]) * (1.0f / TEXEL_WEIGHT_SUM);
|
||||
dt->texel_weights[i][j] = grid_weights_of_texel[i][j];
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < weights_per_block; i++)
|
||||
{
|
||||
dt->weight_num_texels[i] = texelcount_of_weight[i];
|
||||
for (j = 0; j < texelcount_of_weight[i]; j++)
|
||||
{
|
||||
dt->weight_texel[i][j] = texels_of_weight[i][j];
|
||||
dt->weights_int[i][j] = texelweights_of_weight[i][j];
|
||||
dt->weights_flt[i][j] = static_cast < float >(texelweights_of_weight[i][j]);
|
||||
}
|
||||
}
|
||||
|
||||
dt->num_texels = texels_per_block;
|
||||
dt->num_weights = weights_per_block;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void construct_block_size_descriptor_2d(int xdim, int ydim, block_size_descriptor * bsd)
|
||||
{
|
||||
int decimation_mode_index[256]; // for each of the 256 entries in the decim_table_array, its index
|
||||
int decimation_mode_count = 0;
|
||||
|
||||
int i;
|
||||
int x_weights;
|
||||
int y_weights;
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
decimation_mode_index[i] = -1;
|
||||
}
|
||||
|
||||
// gather all the infill-modes that can be used with the current block size
|
||||
for (x_weights = 2; x_weights <= 12; x_weights++)
|
||||
for (y_weights = 2; y_weights <= 12; y_weights++)
|
||||
{
|
||||
if (x_weights * y_weights > MAX_WEIGHTS_PER_BLOCK)
|
||||
continue;
|
||||
decimation_table *dt = new decimation_table;
|
||||
decimation_mode_index[y_weights * 16 + x_weights] = decimation_mode_count;
|
||||
initialize_decimation_table_2d(xdim, ydim, x_weights, y_weights, dt);
|
||||
|
||||
int weight_count = x_weights * y_weights;
|
||||
|
||||
int maxprec_1plane = -1;
|
||||
int maxprec_2planes = -1;
|
||||
for (i = 0; i < 12; i++)
|
||||
{
|
||||
int bits_1plane = compute_ise_bitcount(weight_count, (quantization_method) i);
|
||||
int bits_2planes = compute_ise_bitcount(2 * weight_count, (quantization_method) i);
|
||||
if (bits_1plane >= MIN_WEIGHT_BITS_PER_BLOCK && bits_1plane <= MAX_WEIGHT_BITS_PER_BLOCK)
|
||||
maxprec_1plane = i;
|
||||
if (bits_2planes >= MIN_WEIGHT_BITS_PER_BLOCK && bits_2planes <= MAX_WEIGHT_BITS_PER_BLOCK)
|
||||
maxprec_2planes = i;
|
||||
}
|
||||
|
||||
if (2 * x_weights * y_weights > MAX_WEIGHTS_PER_BLOCK)
|
||||
maxprec_2planes = -1;
|
||||
|
||||
bsd->permit_encode[decimation_mode_count] = (x_weights <= xdim && y_weights <= ydim);
|
||||
|
||||
bsd->decimation_mode_samples[decimation_mode_count] = weight_count;
|
||||
bsd->decimation_mode_maxprec_1plane[decimation_mode_count] = maxprec_1plane;
|
||||
bsd->decimation_mode_maxprec_2planes[decimation_mode_count] = maxprec_2planes;
|
||||
bsd->decimation_tables[decimation_mode_count] = dt;
|
||||
|
||||
decimation_mode_count++;
|
||||
}
|
||||
|
||||
for (i = 0; i < MAX_DECIMATION_MODES; i++)
|
||||
{
|
||||
bsd->decimation_mode_percentile[i] = 1.0f;
|
||||
}
|
||||
|
||||
for (i = decimation_mode_count; i < MAX_DECIMATION_MODES; i++)
|
||||
{
|
||||
bsd->permit_encode[i] = 0;
|
||||
bsd->decimation_mode_samples[i] = 0;
|
||||
bsd->decimation_mode_maxprec_1plane[i] = -1;
|
||||
bsd->decimation_mode_maxprec_2planes[i] = -1;
|
||||
}
|
||||
|
||||
bsd->decimation_mode_count = decimation_mode_count;
|
||||
|
||||
const float *percentiles = get_2d_percentile_table(xdim, ydim);
|
||||
|
||||
// then construct the list of block formats
|
||||
for (i = 0; i < 2048; i++)
|
||||
{
|
||||
int x_weights, y_weights;
|
||||
int is_dual_plane;
|
||||
int quantization_mode;
|
||||
int fail = 0;
|
||||
int permit_encode = 1;
|
||||
|
||||
if (decode_block_mode_2d(i, &x_weights, &y_weights, &is_dual_plane, &quantization_mode))
|
||||
{
|
||||
if (x_weights > xdim || y_weights > ydim)
|
||||
permit_encode = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
fail = 1;
|
||||
permit_encode = 0;
|
||||
}
|
||||
|
||||
if (fail)
|
||||
{
|
||||
bsd->block_modes[i].decimation_mode = -1;
|
||||
bsd->block_modes[i].quantization_mode = -1;
|
||||
bsd->block_modes[i].is_dual_plane = -1;
|
||||
bsd->block_modes[i].permit_encode = 0;
|
||||
bsd->block_modes[i].permit_decode = 0;
|
||||
bsd->block_modes[i].percentile = 1.0f;
|
||||
}
|
||||
else
|
||||
{
|
||||
int decimation_mode = decimation_mode_index[y_weights * 16 + x_weights];
|
||||
bsd->block_modes[i].decimation_mode = decimation_mode;
|
||||
bsd->block_modes[i].quantization_mode = quantization_mode;
|
||||
bsd->block_modes[i].is_dual_plane = is_dual_plane;
|
||||
bsd->block_modes[i].permit_encode = permit_encode;
|
||||
bsd->block_modes[i].permit_decode = permit_encode; // disallow decode of grid size larger than block size.
|
||||
bsd->block_modes[i].percentile = percentiles[i];
|
||||
|
||||
if (bsd->decimation_mode_percentile[decimation_mode] > percentiles[i])
|
||||
bsd->decimation_mode_percentile[decimation_mode] = percentiles[i];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (xdim * ydim <= 64)
|
||||
{
|
||||
bsd->texelcount_for_bitmap_partitioning = xdim * ydim;
|
||||
for (i = 0; i < xdim * ydim; i++)
|
||||
bsd->texels_for_bitmap_partitioning[i] = i;
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
// pick 64 random texels for use with bitmap partitioning.
|
||||
int arr[MAX_TEXELS_PER_BLOCK];
|
||||
for (i = 0; i < xdim * ydim; i++)
|
||||
arr[i] = 0;
|
||||
int arr_elements_set = 0;
|
||||
while (arr_elements_set < 64)
|
||||
{
|
||||
int idx = rand() % (xdim * ydim);
|
||||
if (arr[idx] == 0)
|
||||
{
|
||||
arr_elements_set++;
|
||||
arr[idx] = 1;
|
||||
}
|
||||
}
|
||||
int texel_weights_written = 0;
|
||||
int idx = 0;
|
||||
while (texel_weights_written < 64)
|
||||
{
|
||||
if (arr[idx])
|
||||
bsd->texels_for_bitmap_partitioning[texel_weights_written++] = idx;
|
||||
idx++;
|
||||
}
|
||||
bsd->texelcount_for_bitmap_partitioning = 64;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void construct_block_size_descriptor_3d(int xdim, int ydim, int zdim, block_size_descriptor * bsd)
|
||||
{
|
||||
int decimation_mode_index[512]; // for each of the 512 entries in the decim_table_array, its index
|
||||
int decimation_mode_count = 0;
|
||||
|
||||
int i;
|
||||
int x_weights;
|
||||
int y_weights;
|
||||
int z_weights;
|
||||
|
||||
for (i = 0; i < 512; i++)
|
||||
{
|
||||
decimation_mode_index[i] = -1;
|
||||
}
|
||||
|
||||
// gather all the infill-modes that can be used with the current block size
|
||||
for (x_weights = 2; x_weights <= 6; x_weights++)
|
||||
for (y_weights = 2; y_weights <= 6; y_weights++)
|
||||
for (z_weights = 2; z_weights <= 6; z_weights++)
|
||||
{
|
||||
if ((x_weights * y_weights * z_weights) > MAX_WEIGHTS_PER_BLOCK)
|
||||
continue;
|
||||
decimation_table *dt = new decimation_table;
|
||||
decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights] = decimation_mode_count;
|
||||
initialize_decimation_table_3d(xdim, ydim, zdim, x_weights, y_weights, z_weights, dt);
|
||||
|
||||
int weight_count = x_weights * y_weights * z_weights;
|
||||
|
||||
int maxprec_1plane = -1;
|
||||
int maxprec_2planes = -1;
|
||||
for (i = 0; i < 12; i++)
|
||||
{
|
||||
int bits_1plane = compute_ise_bitcount(weight_count, (quantization_method) i);
|
||||
int bits_2planes = compute_ise_bitcount(2 * weight_count, (quantization_method) i);
|
||||
if (bits_1plane >= MIN_WEIGHT_BITS_PER_BLOCK && bits_1plane <= MAX_WEIGHT_BITS_PER_BLOCK)
|
||||
maxprec_1plane = i;
|
||||
if (bits_2planes >= MIN_WEIGHT_BITS_PER_BLOCK && bits_2planes <= MAX_WEIGHT_BITS_PER_BLOCK)
|
||||
maxprec_2planes = i;
|
||||
}
|
||||
|
||||
if ((2 * x_weights * y_weights * z_weights) > MAX_WEIGHTS_PER_BLOCK)
|
||||
maxprec_2planes = -1;
|
||||
|
||||
bsd->permit_encode[decimation_mode_count] = (x_weights <= xdim && y_weights <= ydim && z_weights <= zdim);
|
||||
|
||||
bsd->decimation_mode_samples[decimation_mode_count] = weight_count;
|
||||
bsd->decimation_mode_maxprec_1plane[decimation_mode_count] = maxprec_1plane;
|
||||
bsd->decimation_mode_maxprec_2planes[decimation_mode_count] = maxprec_2planes;
|
||||
bsd->decimation_tables[decimation_mode_count] = dt;
|
||||
|
||||
decimation_mode_count++;
|
||||
}
|
||||
|
||||
for (i = 0; i < MAX_DECIMATION_MODES; i++)
|
||||
{
|
||||
bsd->decimation_mode_percentile[i] = 1.0f;
|
||||
}
|
||||
|
||||
for (i = decimation_mode_count; i < MAX_DECIMATION_MODES; i++)
|
||||
{
|
||||
bsd->permit_encode[i] = 0;
|
||||
bsd->decimation_mode_samples[i] = 0;
|
||||
bsd->decimation_mode_maxprec_1plane[i] = -1;
|
||||
bsd->decimation_mode_maxprec_2planes[i] = -1;
|
||||
}
|
||||
|
||||
bsd->decimation_mode_count = decimation_mode_count;
|
||||
|
||||
const float *percentiles = get_3d_percentile_table(xdim, ydim, zdim);
|
||||
|
||||
// then construct the list of block formats
|
||||
for (i = 0; i < 2048; i++)
|
||||
{
|
||||
int x_weights, y_weights, z_weights;
|
||||
int is_dual_plane;
|
||||
int quantization_mode;
|
||||
int fail = 0;
|
||||
int permit_encode = 1;
|
||||
|
||||
if (decode_block_mode_3d(i, &x_weights, &y_weights, &z_weights, &is_dual_plane, &quantization_mode))
|
||||
{
|
||||
if (x_weights > xdim || y_weights > ydim || z_weights > zdim)
|
||||
permit_encode = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
fail = 1;
|
||||
permit_encode = 0;
|
||||
}
|
||||
if (fail)
|
||||
{
|
||||
bsd->block_modes[i].decimation_mode = -1;
|
||||
bsd->block_modes[i].quantization_mode = -1;
|
||||
bsd->block_modes[i].is_dual_plane = -1;
|
||||
bsd->block_modes[i].permit_encode = 0;
|
||||
bsd->block_modes[i].permit_decode = 0;
|
||||
bsd->block_modes[i].percentile = 1.0f;
|
||||
}
|
||||
else
|
||||
{
|
||||
int decimation_mode = decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights];
|
||||
bsd->block_modes[i].decimation_mode = decimation_mode;
|
||||
bsd->block_modes[i].quantization_mode = quantization_mode;
|
||||
bsd->block_modes[i].is_dual_plane = is_dual_plane;
|
||||
bsd->block_modes[i].permit_encode = permit_encode;
|
||||
bsd->block_modes[i].permit_decode = permit_encode;
|
||||
bsd->block_modes[i].percentile = percentiles[i];
|
||||
|
||||
if (bsd->decimation_mode_percentile[decimation_mode] > percentiles[i])
|
||||
bsd->decimation_mode_percentile[decimation_mode] = percentiles[i];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (xdim * ydim * zdim <= 64)
|
||||
{
|
||||
bsd->texelcount_for_bitmap_partitioning = xdim * ydim * zdim;
|
||||
for (i = 0; i < xdim * ydim * zdim; i++)
|
||||
bsd->texels_for_bitmap_partitioning[i] = i;
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
// pick 64 random texels for use with bitmap partitioning.
|
||||
int arr[MAX_TEXELS_PER_BLOCK];
|
||||
for (i = 0; i < xdim * ydim * zdim; i++)
|
||||
arr[i] = 0;
|
||||
int arr_elements_set = 0;
|
||||
while (arr_elements_set < 64)
|
||||
{
|
||||
int idx = rand() % (xdim * ydim * zdim);
|
||||
if (arr[idx] == 0)
|
||||
{
|
||||
arr_elements_set++;
|
||||
arr[idx] = 1;
|
||||
}
|
||||
}
|
||||
int texel_weights_written = 0;
|
||||
int idx = 0;
|
||||
while (texel_weights_written < 64)
|
||||
{
|
||||
if (arr[idx])
|
||||
bsd->texels_for_bitmap_partitioning[texel_weights_written++] = idx;
|
||||
idx++;
|
||||
}
|
||||
bsd->texelcount_for_bitmap_partitioning = 64;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
static block_size_descriptor *bsd_pointers[4096];
|
||||
|
||||
// function to obtain a block size descriptor. If the descriptor does not exist,
|
||||
// it is created as needed. Should not be called from within multi-threaded code.
|
||||
const block_size_descriptor *get_block_size_descriptor(int xdim, int ydim, int zdim)
|
||||
{
|
||||
int bsd_index = xdim + (ydim << 4) + (zdim << 8);
|
||||
if (bsd_pointers[bsd_index] == NULL)
|
||||
{
|
||||
block_size_descriptor *bsd = new block_size_descriptor;
|
||||
if (zdim > 1)
|
||||
construct_block_size_descriptor_3d(xdim, ydim, zdim, bsd);
|
||||
else
|
||||
construct_block_size_descriptor_2d(xdim, ydim, bsd);
|
||||
|
||||
bsd_pointers[bsd_index] = bsd;
|
||||
}
|
||||
return bsd_pointers[bsd_index];
|
||||
}
|
||||
815
3rdparty/astc/astc_codec_internals.h
vendored
Normal file
815
3rdparty/astc/astc_codec_internals.h
vendored
Normal file
@@ -0,0 +1,815 @@
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This confidential and proprietary software may be used only as
|
||||
* authorised by a licensing agreement from ARM Limited
|
||||
* (C) COPYRIGHT 2011-2012, 2018 ARM Limited
|
||||
* ALL RIGHTS RESERVED
|
||||
*
|
||||
* The entire notice above must be reproduced on all authorised
|
||||
* copies and copies may only be made to the extent permitted
|
||||
* by a licensing agreement from ARM Limited.
|
||||
*
|
||||
* @brief Internal function and data declarations for ASTC codec.
|
||||
*/
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
#ifndef ASTC_CODEC_INTERNALS_INCLUDED
|
||||
|
||||
#define ASTC_CODEC_INTERNALS_INCLUDED
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include "mathlib.h"
|
||||
|
||||
#ifndef MIN
|
||||
#define MIN(x,y) ((x)<(y)?(x):(y))
|
||||
#endif
|
||||
|
||||
#ifndef MAX
|
||||
#define MAX(x,y) ((x)>(y)?(x):(y))
|
||||
#endif
|
||||
|
||||
// Macro to silence warnings on ignored parameters.
|
||||
// The presence of this macro should be a signal to look at refactoring.
|
||||
#define IGNORE(param) ((void)¶m)
|
||||
|
||||
#define astc_isnan(p) ((p)!=(p))
|
||||
|
||||
// ASTC parameters
|
||||
#define MAX_TEXELS_PER_BLOCK 216
|
||||
#define MAX_WEIGHTS_PER_BLOCK 64
|
||||
#define MIN_WEIGHT_BITS_PER_BLOCK 24
|
||||
#define MAX_WEIGHT_BITS_PER_BLOCK 96
|
||||
#define PARTITION_BITS 10
|
||||
#define PARTITION_COUNT (1 << PARTITION_BITS)
|
||||
|
||||
// the sum of weights for one texel.
|
||||
#define TEXEL_WEIGHT_SUM 16
|
||||
#define MAX_DECIMATION_MODES 87
|
||||
#define MAX_WEIGHT_MODES 2048
|
||||
|
||||
// error reporting for codec internal errors.
|
||||
#define ASTC_CODEC_INTERNAL_ERROR astc_codec_internal_error(__FILE__, __LINE__)
|
||||
|
||||
void astc_codec_internal_error(const char *filename, int linenumber);
|
||||
|
||||
// uncomment this macro to enable checking for inappropriate NaNs;
|
||||
// works on Linux only, and slows down encoding significantly.
|
||||
// #define DEBUG_CAPTURE_NAN
|
||||
|
||||
// the PRINT_DIAGNOSTICS macro enables the -diag command line switch,
|
||||
// which can be used to look for codec bugs
|
||||
#define DEBUG_PRINT_DIAGNOSTICS
|
||||
|
||||
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
||||
extern int print_diagnostics;
|
||||
#endif
|
||||
|
||||
extern int print_tile_errors;
|
||||
extern int print_statistics;
|
||||
|
||||
extern int perform_srgb_transform;
|
||||
extern int rgb_force_use_of_hdr;
|
||||
extern int alpha_force_use_of_hdr;
|
||||
|
||||
struct processed_line2
|
||||
{
|
||||
float2 amod;
|
||||
float2 bs;
|
||||
float2 bis;
|
||||
};
|
||||
struct processed_line3
|
||||
{
|
||||
float3 amod;
|
||||
float3 bs;
|
||||
float3 bis;
|
||||
};
|
||||
struct processed_line4
|
||||
{
|
||||
float4 amod;
|
||||
float4 bs;
|
||||
float4 bis;
|
||||
};
|
||||
|
||||
enum astc_decode_mode
|
||||
{
|
||||
DECODE_LDR_SRGB,
|
||||
DECODE_LDR,
|
||||
DECODE_HDR
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
Partition table representation:
|
||||
For each block size, we have 3 tables, each with 1024 partitionings;
|
||||
these three tables correspond to 2, 3 and 4 partitions respectively.
|
||||
For each partitioning, we have:
|
||||
* a 4-entry table indicating how many texels there are in each of the 4 partitions.
|
||||
This may be from 0 to a very large value.
|
||||
* a table indicating the partition index of each of the texels in the block.
|
||||
Each index may be 0, 1, 2 or 3.
|
||||
* Each element in the table is an uint8_t indicating partition index (0, 1, 2 or 3)
|
||||
*/
|
||||
|
||||
struct partition_info
|
||||
{
|
||||
int partition_count;
|
||||
uint8_t texels_per_partition[4];
|
||||
uint8_t partition_of_texel[MAX_TEXELS_PER_BLOCK];
|
||||
uint8_t texels_of_partition[4][MAX_TEXELS_PER_BLOCK];
|
||||
|
||||
uint64_t coverage_bitmaps[4]; // used for the purposes of k-means partition search.
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
In ASTC, we don't necessarily provide a weight for every texel.
|
||||
As such, for each block size, there are a number of patterns where some texels
|
||||
have their weights computed as a weighted average of more than 1 weight.
|
||||
As such, the codec uses a data structure that tells us: for each texel, which
|
||||
weights it is a combination of for each weight, which texels it contributes to.
|
||||
The decimation_table is this data structure.
|
||||
*/
|
||||
struct decimation_table
|
||||
{
|
||||
int num_texels;
|
||||
int num_weights;
|
||||
uint8_t texel_num_weights[MAX_TEXELS_PER_BLOCK]; // number of indices that go into the calculation for a texel
|
||||
uint8_t texel_weights_int[MAX_TEXELS_PER_BLOCK][4]; // the weight to assign to each weight
|
||||
float texel_weights_float[MAX_TEXELS_PER_BLOCK][4]; // the weight to assign to each weight
|
||||
uint8_t texel_weights[MAX_TEXELS_PER_BLOCK][4]; // the weights that go into a texel calculation
|
||||
uint8_t weight_num_texels[MAX_WEIGHTS_PER_BLOCK]; // the number of texels that a given weight contributes to
|
||||
uint8_t weight_texel[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK]; // the texels that the weight contributes to
|
||||
uint8_t weights_int[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK]; // the weights that the weight contributes to a texel.
|
||||
float weights_flt[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK]; // the weights that the weight contributes to a texel.
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
data structure describing information that pertains to a block size and its associated block modes.
|
||||
*/
|
||||
struct block_mode
|
||||
{
|
||||
int8_t decimation_mode;
|
||||
int8_t quantization_mode;
|
||||
int8_t is_dual_plane;
|
||||
int8_t permit_encode;
|
||||
int8_t permit_decode;
|
||||
float percentile;
|
||||
};
|
||||
|
||||
|
||||
struct block_size_descriptor
|
||||
{
|
||||
int decimation_mode_count;
|
||||
int decimation_mode_samples[MAX_DECIMATION_MODES];
|
||||
int decimation_mode_maxprec_1plane[MAX_DECIMATION_MODES];
|
||||
int decimation_mode_maxprec_2planes[MAX_DECIMATION_MODES];
|
||||
float decimation_mode_percentile[MAX_DECIMATION_MODES];
|
||||
int permit_encode[MAX_DECIMATION_MODES];
|
||||
const decimation_table *decimation_tables[MAX_DECIMATION_MODES + 1];
|
||||
block_mode block_modes[MAX_WEIGHT_MODES];
|
||||
|
||||
// for the k-means bed bitmap partitioning algorithm, we don't
|
||||
// want to consider more than 64 texels; this array specifies
|
||||
// which 64 texels (if that many) to consider.
|
||||
int texelcount_for_bitmap_partitioning;
|
||||
int texels_for_bitmap_partitioning[64];
|
||||
};
|
||||
|
||||
// data structure representing one block of an image.
|
||||
// it is expanded to float prior to processing to save some computation time
|
||||
// on conversions to/from uint8_t (this also allows us to handle HDR textures easily)
|
||||
struct imageblock
|
||||
{
|
||||
float orig_data[MAX_TEXELS_PER_BLOCK * 4]; // original input data
|
||||
float work_data[MAX_TEXELS_PER_BLOCK * 4]; // the data that we will compress, either linear or LNS (0..65535 in both cases)
|
||||
float deriv_data[MAX_TEXELS_PER_BLOCK * 4]; // derivative of the conversion function used, used to modify error weighting
|
||||
|
||||
uint8_t rgb_lns[MAX_TEXELS_PER_BLOCK]; // 1 if RGB data are being treated as LNS
|
||||
uint8_t alpha_lns[MAX_TEXELS_PER_BLOCK]; // 1 if Alpha data are being treated as LNS
|
||||
uint8_t nan_texel[MAX_TEXELS_PER_BLOCK]; // 1 if the texel is a NaN-texel.
|
||||
|
||||
float red_min, red_max;
|
||||
float green_min, green_max;
|
||||
float blue_min, blue_max;
|
||||
float alpha_min, alpha_max;
|
||||
int grayscale; // 1 if R=G=B for every pixel, 0 otherwise
|
||||
|
||||
int xpos, ypos, zpos;
|
||||
};
|
||||
|
||||
|
||||
struct error_weighting_params
|
||||
{
|
||||
float rgb_power;
|
||||
float rgb_base_weight;
|
||||
float rgb_mean_weight;
|
||||
float rgb_stdev_weight;
|
||||
float alpha_power;
|
||||
float alpha_base_weight;
|
||||
float alpha_mean_weight;
|
||||
float alpha_stdev_weight;
|
||||
float rgb_mean_and_stdev_mixing;
|
||||
int mean_stdev_radius;
|
||||
int enable_rgb_scale_with_alpha;
|
||||
int alpha_radius;
|
||||
int ra_normal_angular_scale;
|
||||
float block_artifact_suppression;
|
||||
float rgba_weights[4];
|
||||
|
||||
float block_artifact_suppression_expanded[MAX_TEXELS_PER_BLOCK];
|
||||
|
||||
// parameters that deal with heuristic codec speedups
|
||||
int partition_search_limit;
|
||||
float block_mode_cutoff;
|
||||
float texel_avg_error_limit;
|
||||
float partition_1_to_2_limit;
|
||||
float lowest_correlation_cutoff;
|
||||
int max_refinement_iters;
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
void update_imageblock_flags(imageblock * pb, int xdim, int ydim, int zdim);
|
||||
|
||||
|
||||
void imageblock_initialize_orig_from_work(imageblock * pb, int pixelcount);
|
||||
|
||||
|
||||
void imageblock_initialize_work_from_orig(imageblock * pb, int pixelcount);
|
||||
|
||||
|
||||
|
||||
/*
|
||||
Data structure representing error weighting for one block of an image. this is used as
|
||||
a multiplier for the error weight to apply to each color component when computing PSNR.
|
||||
|
||||
This weighting has several uses: it's usable for RA, GA, BA, A weighting, which is useful
|
||||
for alpha-textures it's usable for HDR textures, where weighting should be approximately inverse to
|
||||
luminance it's usable for perceptual weighting, where we assign higher weight to low-variability
|
||||
regions than to high-variability regions. it's usable for suppressing off-edge block content in
|
||||
case the texture doesn't actually extend to the edge of the block.
|
||||
|
||||
For the default case (everything is evenly weighted), every weight is 1. For the RA,GA,BA,A case,
|
||||
we multiply the R,G,B weights with that of the alpha.
|
||||
|
||||
Putting the same weight in every component should result in the default case.
|
||||
The following relations should hold:
|
||||
|
||||
texel_weight_rg[i] = (texel_weight_r[i] + texel_weight_g[i]) / 2
|
||||
texel_weight_lum[i] = (texel_weight_r[i] + texel_weight_g[i] + texel_weight_b[i]) / 3
|
||||
texel_weight[i] = (texel_weight_r[i] + texel_weight_g[i] + texel_weight_b[i] + texel_weight_a[i] / 4
|
||||
*/
|
||||
|
||||
struct error_weight_block
|
||||
{
|
||||
float4 error_weights[MAX_TEXELS_PER_BLOCK];
|
||||
float texel_weight[MAX_TEXELS_PER_BLOCK];
|
||||
float texel_weight_gba[MAX_TEXELS_PER_BLOCK];
|
||||
float texel_weight_rba[MAX_TEXELS_PER_BLOCK];
|
||||
float texel_weight_rga[MAX_TEXELS_PER_BLOCK];
|
||||
float texel_weight_rgb[MAX_TEXELS_PER_BLOCK];
|
||||
|
||||
float texel_weight_rg[MAX_TEXELS_PER_BLOCK];
|
||||
float texel_weight_rb[MAX_TEXELS_PER_BLOCK];
|
||||
float texel_weight_gb[MAX_TEXELS_PER_BLOCK];
|
||||
float texel_weight_ra[MAX_TEXELS_PER_BLOCK];
|
||||
|
||||
float texel_weight_r[MAX_TEXELS_PER_BLOCK];
|
||||
float texel_weight_g[MAX_TEXELS_PER_BLOCK];
|
||||
float texel_weight_b[MAX_TEXELS_PER_BLOCK];
|
||||
float texel_weight_a[MAX_TEXELS_PER_BLOCK];
|
||||
|
||||
int contains_zeroweight_texels;
|
||||
};
|
||||
|
||||
|
||||
|
||||
struct error_weight_block_orig
|
||||
{
|
||||
float4 error_weights[MAX_TEXELS_PER_BLOCK];
|
||||
};
|
||||
|
||||
|
||||
// enumeration of all the quantization methods we support under this format.
|
||||
enum quantization_method
|
||||
{
|
||||
QUANT_2 = 0,
|
||||
QUANT_3 = 1,
|
||||
QUANT_4 = 2,
|
||||
QUANT_5 = 3,
|
||||
QUANT_6 = 4,
|
||||
QUANT_8 = 5,
|
||||
QUANT_10 = 6,
|
||||
QUANT_12 = 7,
|
||||
QUANT_16 = 8,
|
||||
QUANT_20 = 9,
|
||||
QUANT_24 = 10,
|
||||
QUANT_32 = 11,
|
||||
QUANT_40 = 12,
|
||||
QUANT_48 = 13,
|
||||
QUANT_64 = 14,
|
||||
QUANT_80 = 15,
|
||||
QUANT_96 = 16,
|
||||
QUANT_128 = 17,
|
||||
QUANT_160 = 18,
|
||||
QUANT_192 = 19,
|
||||
QUANT_256 = 20
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
In ASTC, we support relatively many combinations of weight precisions and weight transfer functions.
|
||||
As such, for each combination we support, we have a hardwired data structure.
|
||||
|
||||
This structure provides the following information: A table, used to estimate the closest quantized
|
||||
weight for a given floating-point weight. For each quantized weight, the corresponding unquantized
|
||||
and floating-point values. For each quantized weight, a previous-value and a next-value.
|
||||
*/
|
||||
|
||||
struct quantization_and_transfer_table
|
||||
{
|
||||
quantization_method method;
|
||||
uint8_t unquantized_value[32]; // 0..64
|
||||
float unquantized_value_flt[32]; // 0..1
|
||||
uint8_t prev_quantized_value[32];
|
||||
uint8_t next_quantized_value[32];
|
||||
uint8_t closest_quantized_weight[1025];
|
||||
};
|
||||
|
||||
extern const quantization_and_transfer_table quant_and_xfer_tables[12];
|
||||
|
||||
|
||||
|
||||
enum endpoint_formats
|
||||
{
|
||||
FMT_LUMINANCE = 0,
|
||||
FMT_LUMINANCE_DELTA = 1,
|
||||
FMT_HDR_LUMINANCE_LARGE_RANGE = 2,
|
||||
FMT_HDR_LUMINANCE_SMALL_RANGE = 3,
|
||||
FMT_LUMINANCE_ALPHA = 4,
|
||||
FMT_LUMINANCE_ALPHA_DELTA = 5,
|
||||
FMT_RGB_SCALE = 6,
|
||||
FMT_HDR_RGB_SCALE = 7,
|
||||
FMT_RGB = 8,
|
||||
FMT_RGB_DELTA = 9,
|
||||
FMT_RGB_SCALE_ALPHA = 10,
|
||||
FMT_HDR_RGB = 11,
|
||||
FMT_RGBA = 12,
|
||||
FMT_RGBA_DELTA = 13,
|
||||
FMT_HDR_RGB_LDR_ALPHA = 14,
|
||||
FMT_HDR_RGBA = 15,
|
||||
};
|
||||
|
||||
|
||||
|
||||
struct symbolic_compressed_block
|
||||
{
|
||||
int error_block; // 1 marks error block, 0 marks non-error-block.
|
||||
int block_mode; // 0 to 2047. Negative value marks constant-color block (-1: FP16, -2:UINT16)
|
||||
int partition_count; // 1 to 4; Zero marks a constant-color block.
|
||||
int partition_index; // 0 to 1023
|
||||
int color_formats[4]; // color format for each endpoint color pair.
|
||||
int color_formats_matched; // color format for all endpoint pairs are matched.
|
||||
int color_values[4][12]; // quantized endpoint color pairs.
|
||||
int color_quantization_level;
|
||||
uint8_t plane1_weights[MAX_WEIGHTS_PER_BLOCK]; // quantized and decimated weights
|
||||
uint8_t plane2_weights[MAX_WEIGHTS_PER_BLOCK];
|
||||
int plane2_color_component; // color component for the secondary plane of weights
|
||||
int constant_color[4]; // constant-color, as FP16 or UINT16. Used for constant-color blocks only.
|
||||
};
|
||||
|
||||
|
||||
struct physical_compressed_block
|
||||
{
|
||||
uint8_t data[16];
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
const block_size_descriptor *get_block_size_descriptor(int xdim, int ydim, int zdim);
|
||||
|
||||
|
||||
// ***********************************************************
|
||||
// functions and data pertaining to quantization and encoding
|
||||
// **********************************************************
|
||||
extern const uint8_t color_quantization_tables[21][256];
|
||||
extern const uint8_t color_unquantization_tables[21][256];
|
||||
|
||||
void encode_ise(int quantization_level, int elements, const uint8_t * input_data, uint8_t * output_data, int bit_offset);
|
||||
|
||||
void decode_ise(int quantization_level, int elements, const uint8_t * input_data, uint8_t * output_data, int bit_offset);
|
||||
|
||||
int compute_ise_bitcount(int items, quantization_method quant);
|
||||
|
||||
void build_quantization_mode_table(void);
|
||||
extern int quantization_mode_table[17][128];
|
||||
|
||||
|
||||
// **********************************************
|
||||
// functions and data pertaining to partitioning
|
||||
// **********************************************
|
||||
|
||||
// function to get a pointer to a partition table or an array thereof.
|
||||
const partition_info *get_partition_table(int xdim, int ydim, int zdim, int partition_count);
|
||||
|
||||
|
||||
|
||||
|
||||
// functions to compute color averages and dominant directions
|
||||
// for each partition in a block
|
||||
|
||||
|
||||
void compute_averages_and_directions_rgb(const partition_info * pt,
|
||||
const imageblock * blk,
|
||||
const error_weight_block * ewb,
|
||||
const float4 * color_scalefactors, float3 * averages, float3 * directions_rgb, float2 * directions_rg, float2 * directions_rb, float2 * directions_gb);
|
||||
|
||||
|
||||
|
||||
void compute_averages_and_directions_rgba(const partition_info * pt,
|
||||
const imageblock * blk,
|
||||
const error_weight_block * ewb,
|
||||
const float4 * color_scalefactors,
|
||||
float4 * averages, float4 * directions_rgba, float3 * directions_gba, float3 * directions_rba, float3 * directions_rga, float3 * directions_rgb);
|
||||
|
||||
|
||||
void compute_averages_and_directions_3_components(const partition_info * pt,
|
||||
const imageblock * blk,
|
||||
const error_weight_block * ewb,
|
||||
const float3 * color_scalefactors, int component1, int component2, int component3, float3 * averages, float3 * directions);
|
||||
|
||||
void compute_averages_and_directions_2_components(const partition_info * pt,
|
||||
const imageblock * blk,
|
||||
const error_weight_block * ewb, const float2 * color_scalefactors, int component1, int component2, float2 * averages, float2 * directions);
|
||||
|
||||
// functions to compute error value across a tile given a partitioning
|
||||
// (with the assumption that each partitioning has colors lying on a line where
|
||||
// they are represented with infinite precision. Also return the length of the line
|
||||
// segments that the partition's colors are actually projected onto.
|
||||
float compute_error_squared_gba(const partition_info * pt, // the partition that we use when computing the squared-error.
|
||||
const imageblock * blk, const error_weight_block * ewb, const processed_line3 * plines,
|
||||
// output: computed length of the partitioning's line. This is not part of the
|
||||
// error introduced by partitioning itself, but us used to estimate the error introduced by quantization
|
||||
float *length_of_lines);
|
||||
|
||||
float compute_error_squared_rba(const partition_info * pt, // the partition that we use when computing the squared-error.
|
||||
const imageblock * blk, const error_weight_block * ewb, const processed_line3 * plines,
|
||||
// output: computed length of the partitioning's line. This is not part of the
|
||||
// error introduced by partitioning itself, but us used to estimate the error introduced by quantization
|
||||
float *length_of_lines);
|
||||
|
||||
float compute_error_squared_rga(const partition_info * pt, // the partition that we use when computing the squared-error.
|
||||
const imageblock * blk, const error_weight_block * ewb, const processed_line3 * plines,
|
||||
// output: computed length of the partitioning's line. This is not part of the
|
||||
// error introduced by partitioning itself, but us used to estimate the error introduced by quantization
|
||||
float *length_of_lines);
|
||||
|
||||
float compute_error_squared_rgb(const partition_info * pt, // the partition that we use when computing the squared-error.
|
||||
const imageblock * blk, const error_weight_block * ewb, const processed_line3 * plines,
|
||||
// output: computed length of the partitioning's line. This is not part of the
|
||||
// error introduced by partitioning itself, but us used to estimate the error introduced by quantization
|
||||
float *length_of_lines);
|
||||
|
||||
|
||||
float compute_error_squared_rgba(const partition_info * pt, // the partition that we use when computing the squared-error.
|
||||
const imageblock * blk, const error_weight_block * ewb, const processed_line4 * lines, // one line for each of the partitions. The lines are assumed to be normalized.
|
||||
float *length_of_lines);
|
||||
|
||||
float compute_error_squared_rg(const partition_info * pt, // the partition that we use when computing the squared-error.
|
||||
const imageblock * blk, const error_weight_block * ewb, const processed_line2 * plines, float *length_of_lines);
|
||||
|
||||
float compute_error_squared_rb(const partition_info * pt, // the partition that we use when computing the squared-error.
|
||||
const imageblock * blk, const error_weight_block * ewb, const processed_line2 * plines, float *length_of_lines);
|
||||
|
||||
float compute_error_squared_gb(const partition_info * pt, // the partition that we use when computing the squared-error.
|
||||
const imageblock * blk, const error_weight_block * ewb, const processed_line2 * plines, float *length_of_lines);
|
||||
|
||||
float compute_error_squared_ra(const partition_info * pt, // the partition that we use when computing the squared-error.
|
||||
const imageblock * blk, const error_weight_block * ewb, const processed_line2 * plines, float *length_of_lines);
|
||||
|
||||
|
||||
// functions to compute error value across a tile for a particular line function
|
||||
// for a single partition.
|
||||
float compute_error_squared_rgb_single_partition(int partition_to_test, int xdim, int ydim, int zdim, const partition_info * pt, // the partition that we use when computing the squared-error.
|
||||
const imageblock * blk, const error_weight_block * ewb, const processed_line3 * lin // the line for the partition.
|
||||
);
|
||||
|
||||
|
||||
|
||||
// for each partition, compute its color weightings.
|
||||
void compute_partition_error_color_weightings(int xdim, int ydim, int zdim, const error_weight_block * ewb, const partition_info * pi, float4 error_weightings[4], float4 color_scalefactors[4]);
|
||||
|
||||
|
||||
|
||||
// function to find the best partitioning for a given block.
|
||||
|
||||
void find_best_partitionings(int partition_search_limit, int xdim, int ydim, int zdim, int partition_count, const imageblock * pb, const error_weight_block * ewb, int candidates_to_return,
|
||||
// best partitionings to use if the endpoint colors are assumed to be uncorrelated
|
||||
int *best_partitions_uncorrellated,
|
||||
// best partitionings to use if the endpoint colors have the same chroma
|
||||
int *best_partitions_samechroma,
|
||||
// best partitionings to use if dual plane of weights are present
|
||||
int *best_partitions_dual_weight_planes);
|
||||
|
||||
|
||||
// use k-means clustering to compute a partition ordering for a block.
|
||||
void kmeans_compute_partition_ordering(int xdim, int ydim, int zdim, int partition_count, const imageblock * blk, int *ordering);
|
||||
|
||||
|
||||
|
||||
|
||||
// *********************************************************
|
||||
// functions and data pertaining to images and imageblocks
|
||||
// *********************************************************
|
||||
|
||||
struct astc_codec_image
|
||||
{
|
||||
uint8_t ***imagedata8;
|
||||
uint16_t ***imagedata16;
|
||||
int xsize;
|
||||
int ysize;
|
||||
int zsize;
|
||||
int padding;
|
||||
};
|
||||
|
||||
void destroy_image(astc_codec_image * img);
|
||||
astc_codec_image *allocate_image(int bitness, int xsize, int ysize, int zsize, int padding);
|
||||
void initialize_image(astc_codec_image * img);
|
||||
void fill_image_padding_area(astc_codec_image * img);
|
||||
|
||||
|
||||
extern float4 ***input_averages;
|
||||
extern float4 ***input_variances;
|
||||
extern float ***input_alpha_averages;
|
||||
|
||||
|
||||
// the entries here : 0=red, 1=green, 2=blue, 3=alpha, 4=0.0, 5=1.0
|
||||
struct swizzlepattern
|
||||
{
|
||||
uint8_t r;
|
||||
uint8_t g;
|
||||
uint8_t b;
|
||||
uint8_t a;
|
||||
};
|
||||
|
||||
|
||||
|
||||
int determine_image_channels(const astc_codec_image * img);
|
||||
|
||||
// function to compute regional averages and variances for an image
|
||||
void compute_averages_and_variances(const astc_codec_image * img, float rgb_power_to_use, float alpha_power_to_use, int avg_kernel_radius, int var_kernel_radius, swizzlepattern swz);
|
||||
|
||||
|
||||
/*
|
||||
Functions to load image from file.
|
||||
If successful, return an astc_codec_image object.
|
||||
If unsuccessful, returns NULL.
|
||||
|
||||
*result is used to return a result. In case of a successfully loaded image, bits[2:0]
|
||||
of *result indicate how many components are present, and bit[7] indicate whether
|
||||
the input image was LDR or HDR (0=LDR, 1=HDR).
|
||||
|
||||
In case of failure, *result is given a negative value.
|
||||
*/
|
||||
|
||||
|
||||
astc_codec_image *load_ktx_uncompressed_image(const char *filename, int padding, int *result);
|
||||
astc_codec_image *load_dds_uncompressed_image(const char *filename, int padding, int *result);
|
||||
astc_codec_image *load_tga_image(const char *tga_filename, int padding, int *result);
|
||||
astc_codec_image *load_image_with_stb(const char *filename, int padding, int *result);
|
||||
|
||||
astc_codec_image *astc_codec_load_image(const char *filename, int padding, int *result);
|
||||
int astc_codec_unlink(const char *filename);
|
||||
|
||||
// function to store image to file
|
||||
// If successful, returns the number of channels in input image
|
||||
// If unsuccessful, returns a negative number.
|
||||
int store_ktx_uncompressed_image(const astc_codec_image * img, const char *filename, int bitness);
|
||||
int store_dds_uncompressed_image(const astc_codec_image * img, const char *filename, int bitness);
|
||||
int store_tga_image(const astc_codec_image * img, const char *tga_filename, int bitness);
|
||||
|
||||
int astc_codec_store_image(const astc_codec_image * img, const char *filename, int bitness, const char **format_string);
|
||||
|
||||
int get_output_filename_enforced_bitness(const char *filename);
|
||||
|
||||
|
||||
// compute a bunch of error metrics
|
||||
void compute_error_metrics(int input_image_is_hdr, int input_components, const astc_codec_image * img1, const astc_codec_image * img2, int low_fstop, int high_fstop, int psnrmode);
|
||||
|
||||
// fetch an image-block from the input file
|
||||
void fetch_imageblock(const astc_codec_image * img, imageblock * pb, // picture-block to initialize with image data
|
||||
// block dimensions
|
||||
int xdim, int ydim, int zdim,
|
||||
// position in picture to fetch block from
|
||||
int xpos, int ypos, int zpos, swizzlepattern swz);
|
||||
|
||||
|
||||
// write an image block to the output file buffer.
|
||||
// the data written are taken from orig_data.
|
||||
void write_imageblock(astc_codec_image * img, const imageblock * pb, // picture-block to initialize with image data
|
||||
// block dimensions
|
||||
int xdim, int ydim, int zdim,
|
||||
// position in picture to write block to.
|
||||
int xpos, int ypos, int zpos, swizzlepattern swz);
|
||||
|
||||
|
||||
// helper function to check whether a given picture-block has alpha that is not
|
||||
// just uniformly 1.
|
||||
int imageblock_uses_alpha(int xdim, int ydim, int zdim, const imageblock * pb);
|
||||
|
||||
|
||||
float compute_imageblock_difference(int xdim, int ydim, int zdim, const imageblock * p1, const imageblock * p2, const error_weight_block * ewb);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// ***********************************************************
|
||||
// functions pertaining to computing texel weights for a block
|
||||
// ***********************************************************
|
||||
|
||||
|
||||
struct endpoints
|
||||
{
|
||||
int partition_count;
|
||||
float4 endpt0[4];
|
||||
float4 endpt1[4];
|
||||
};
|
||||
|
||||
|
||||
struct endpoints_and_weights
|
||||
{
|
||||
endpoints ep;
|
||||
float weights[MAX_TEXELS_PER_BLOCK];
|
||||
float weight_error_scale[MAX_TEXELS_PER_BLOCK];
|
||||
};
|
||||
|
||||
|
||||
void compute_endpoints_and_ideal_weights_1_plane(int xdim, int ydim, int zdim, const partition_info * pt, const imageblock * blk, const error_weight_block * ewb, endpoints_and_weights * ei);
|
||||
|
||||
void compute_endpoints_and_ideal_weights_2_planes(int xdim, int ydim, int zdim, const partition_info * pt, const imageblock * blk, const error_weight_block * ewb, int separate_component,
|
||||
endpoints_and_weights * ei1, // for the three components of the primary plane of weights
|
||||
endpoints_and_weights * ei2 // for the remaining component.
|
||||
);
|
||||
|
||||
void compute_ideal_weights_for_decimation_table(const endpoints_and_weights * eai, const decimation_table * it, float *weight_set, float *weights);
|
||||
|
||||
void compute_ideal_quantized_weights_for_decimation_table(const endpoints_and_weights * eai,
|
||||
const decimation_table * it,
|
||||
float low_bound, float high_bound, const float *weight_set_in, float *weight_set_out, uint8_t * quantized_weight_set, int quantization_level);
|
||||
|
||||
|
||||
float compute_error_of_weight_set(const endpoints_and_weights * eai, const decimation_table * it, const float *weights);
|
||||
|
||||
|
||||
float compute_value_of_texel_flt(int texel_to_get, const decimation_table * it, const float *weights);
|
||||
|
||||
|
||||
int compute_value_of_texel_int(int texel_to_get, const decimation_table * it, const int *weights);
|
||||
|
||||
|
||||
void merge_endpoints(const endpoints * ep1, // contains three of the color components
|
||||
const endpoints * ep2, // contains the remaining color component
|
||||
int separate_component, endpoints * res);
|
||||
|
||||
// functions dealing with color endpoints
|
||||
|
||||
// function to pack a pair of color endpoints into a series of integers.
|
||||
// the format used may or may not match the format specified;
|
||||
// the return value is the format actually used.
|
||||
int pack_color_endpoints(astc_decode_mode decode_mode, float4 color0, float4 color1, float4 rgbs_color, float4 rgbo_color, float2 luminances, int format, int *output, int quantization_level);
|
||||
|
||||
|
||||
// unpack a pair of color endpoints from a series of integers.
|
||||
void unpack_color_endpoints(astc_decode_mode decode_mode, int format, int quantization_level, const int *input, int *rgb_hdr, int *alpha_hdr, int *nan_endpoint, ushort4 * output0, ushort4 * output1);
|
||||
|
||||
|
||||
struct encoding_choice_errors
|
||||
{
|
||||
float rgb_scale_error; // error of using LDR RGB-scale instead of complete endpoints.
|
||||
float rgb_luma_error; // error of using HDR RGB-scale instead of complete endpoints.
|
||||
float luminance_error; // error of using luminance instead of RGB
|
||||
float alpha_drop_error; // error of discarding alpha
|
||||
float rgb_drop_error; // error of discarding RGB
|
||||
int can_offset_encode;
|
||||
int can_blue_contract;
|
||||
};
|
||||
|
||||
// buffers used to store intermediate data in compress_symbolic_block_fixed_partition_*()
|
||||
struct compress_fixed_partition_buffers
|
||||
{
|
||||
endpoints_and_weights* ei1;
|
||||
endpoints_and_weights* ei2;
|
||||
endpoints_and_weights* eix1;
|
||||
endpoints_and_weights* eix2;
|
||||
float *decimated_quantized_weights;
|
||||
float *decimated_weights;
|
||||
float *flt_quantized_decimated_quantized_weights;
|
||||
uint8_t *u8_quantized_decimated_quantized_weights;
|
||||
};
|
||||
|
||||
struct compress_symbolic_block_buffers
|
||||
{
|
||||
error_weight_block *ewb;
|
||||
error_weight_block_orig *ewbo;
|
||||
symbolic_compressed_block *tempblocks;
|
||||
imageblock *temp;
|
||||
compress_fixed_partition_buffers *plane1;
|
||||
compress_fixed_partition_buffers *planes2;
|
||||
};
|
||||
|
||||
void compute_encoding_choice_errors(int xdim, int ydim, int zdim, const imageblock * pb, const partition_info * pi, const error_weight_block * ewb,
|
||||
int separate_component, // component that is separated out in 2-plane mode, -1 in 1-plane mode
|
||||
encoding_choice_errors * eci);
|
||||
|
||||
|
||||
|
||||
void determine_optimal_set_of_endpoint_formats_to_use(int xdim, int ydim, int zdim, const partition_info * pt, const imageblock * blk, const error_weight_block * ewb, const endpoints * ep,
|
||||
int separate_component, // separate color component for 2-plane mode; -1 for single-plane mode
|
||||
// bitcounts and errors computed for the various quantization methods
|
||||
const int *qwt_bitcounts, const float *qwt_errors,
|
||||
// output data
|
||||
int partition_format_specifiers[4][4], int quantized_weight[4], int quantization_level[4], int quantization_level_mod[4]);
|
||||
|
||||
|
||||
void recompute_ideal_colors(int xdim, int ydim, int zdim, int weight_quantization_mode, endpoints * ep, // contains the endpoints we wish to update
|
||||
float4 * rgbs_vectors, // used to return RGBS-vectors for endpoint mode #6
|
||||
float4 * rgbo_vectors, // used to return RGBS-vectors for endpoint mode #7
|
||||
float2 * lum_vectors, // used to return luminance-vectors.
|
||||
const uint8_t * weight_set, // the current set of weight values
|
||||
const uint8_t * plane2_weight_set, // NULL if plane 2 is not actually used.
|
||||
int plane2_color_component, // color component for 2nd plane of weights; -1 if the 2nd plane of weights is not present
|
||||
const partition_info * pi, const decimation_table * it, const imageblock * pb, // picture-block containing the actual data.
|
||||
const error_weight_block * ewb);
|
||||
|
||||
|
||||
|
||||
void expand_block_artifact_suppression(int xdim, int ydim, int zdim, error_weighting_params * ewp);
|
||||
|
||||
// Function to set error weights for each color component for each texel in a block.
|
||||
// Returns the sum of all the error values set.
|
||||
float prepare_error_weight_block(const astc_codec_image * input_image,
|
||||
// dimensions of error weight block.
|
||||
int xdim, int ydim, int zdim, const error_weighting_params * ewp, const imageblock * blk, error_weight_block * ewb, error_weight_block_orig * ewbo);
|
||||
|
||||
|
||||
// functions pertaining to weight alignment
|
||||
void prepare_angular_tables(void);
|
||||
|
||||
void compute_angular_endpoints_1plane(float mode_cutoff,
|
||||
const block_size_descriptor * bsd,
|
||||
const float *decimated_quantized_weights, const float *decimated_weights, float low_value[MAX_WEIGHT_MODES], float high_value[MAX_WEIGHT_MODES]);
|
||||
|
||||
void compute_angular_endpoints_2planes(float mode_cutoff,
|
||||
const block_size_descriptor * bsd,
|
||||
const float *decimated_quantized_weights,
|
||||
const float *decimated_weights,
|
||||
float low_value1[MAX_WEIGHT_MODES], float high_value1[MAX_WEIGHT_MODES], float low_value2[MAX_WEIGHT_MODES], float high_value2[MAX_WEIGHT_MODES]);
|
||||
|
||||
|
||||
|
||||
|
||||
/* *********************************** high-level encode and decode functions ************************************ */
|
||||
|
||||
float compress_symbolic_block(const astc_codec_image * input_image,
|
||||
astc_decode_mode decode_mode, int xdim, int ydim, int zdim, const error_weighting_params * ewp, const imageblock * blk, symbolic_compressed_block * scb,
|
||||
compress_symbolic_block_buffers * tmpbuf);
|
||||
|
||||
|
||||
float4 lerp_color_flt(const float4 color0, const float4 color1, float weight, // 0..1
|
||||
float plane2_weight, // 0..1
|
||||
int plane2_color_component // 0..3; -1 if only one plane of weights is present.
|
||||
);
|
||||
|
||||
|
||||
ushort4 lerp_color_int(astc_decode_mode decode_mode, ushort4 color0, ushort4 color1, int weight, // 0..64
|
||||
int plane2_weight, // 0..64
|
||||
int plane2_color_component // 0..3; -1 if only one plane of weights is present.
|
||||
);
|
||||
|
||||
|
||||
void decompress_symbolic_block(astc_decode_mode decode_mode,
|
||||
// dimensions of block
|
||||
int xdim, int ydim, int zdim,
|
||||
// position of block
|
||||
int xpos, int ypos, int zpos, const symbolic_compressed_block * scb, imageblock * blk);
|
||||
|
||||
|
||||
physical_compressed_block symbolic_to_physical(int xdim, int ydim, int zdim, const symbolic_compressed_block * sc);
|
||||
|
||||
void physical_to_symbolic(int xdim, int ydim, int zdim, physical_compressed_block pb, symbolic_compressed_block * res);
|
||||
|
||||
|
||||
uint16_t unorm16_to_sf16(uint16_t p);
|
||||
uint16_t lns_to_sf16(uint16_t p);
|
||||
|
||||
|
||||
#endif
|
||||
2096
3rdparty/astc/astc_color_quantize.cpp
vendored
Normal file
2096
3rdparty/astc/astc_color_quantize.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
970
3rdparty/astc/astc_color_unquantize.cpp
vendored
Normal file
970
3rdparty/astc/astc_color_unquantize.cpp
vendored
Normal file
@@ -0,0 +1,970 @@
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This confidential and proprietary software may be used only as
|
||||
* authorised by a licensing agreement from ARM Limited
|
||||
* (C) COPYRIGHT 2011-2012 ARM Limited
|
||||
* ALL RIGHTS RESERVED
|
||||
*
|
||||
* The entire notice above must be reproduced on all authorised
|
||||
* copies and copies may only be made to the extent permitted
|
||||
* by a licensing agreement from ARM Limited.
|
||||
*
|
||||
* @brief Color unquantization functions for ASTC.
|
||||
*/
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
#include "astc_codec_internals.h"
|
||||
|
||||
#include "mathlib.h"
|
||||
#include "softfloat.h"
|
||||
|
||||
int rgb_delta_unpack(const int input[6], int quantization_level, ushort4 * output0, ushort4 * output1)
|
||||
{
|
||||
// unquantize the color endpoints
|
||||
int r0 = color_unquantization_tables[quantization_level][input[0]];
|
||||
int g0 = color_unquantization_tables[quantization_level][input[2]];
|
||||
int b0 = color_unquantization_tables[quantization_level][input[4]];
|
||||
|
||||
int r1 = color_unquantization_tables[quantization_level][input[1]];
|
||||
int g1 = color_unquantization_tables[quantization_level][input[3]];
|
||||
int b1 = color_unquantization_tables[quantization_level][input[5]];
|
||||
|
||||
// perform the bit-transfer procedure
|
||||
r0 |= (r1 & 0x80) << 1;
|
||||
g0 |= (g1 & 0x80) << 1;
|
||||
b0 |= (b1 & 0x80) << 1;
|
||||
r1 &= 0x7F;
|
||||
g1 &= 0x7F;
|
||||
b1 &= 0x7F;
|
||||
if (r1 & 0x40)
|
||||
r1 -= 0x80;
|
||||
if (g1 & 0x40)
|
||||
g1 -= 0x80;
|
||||
if (b1 & 0x40)
|
||||
b1 -= 0x80;
|
||||
|
||||
r0 >>= 1;
|
||||
g0 >>= 1;
|
||||
b0 >>= 1;
|
||||
r1 >>= 1;
|
||||
g1 >>= 1;
|
||||
b1 >>= 1;
|
||||
|
||||
int rgbsum = r1 + g1 + b1;
|
||||
|
||||
r1 += r0;
|
||||
g1 += g0;
|
||||
b1 += b0;
|
||||
|
||||
|
||||
int retval;
|
||||
|
||||
int r0e, g0e, b0e;
|
||||
int r1e, g1e, b1e;
|
||||
|
||||
if (rgbsum >= 0)
|
||||
{
|
||||
r0e = r0;
|
||||
g0e = g0;
|
||||
b0e = b0;
|
||||
|
||||
r1e = r1;
|
||||
g1e = g1;
|
||||
b1e = b1;
|
||||
|
||||
retval = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
r0e = (r1 + b1) >> 1;
|
||||
g0e = (g1 + b1) >> 1;
|
||||
b0e = b1;
|
||||
|
||||
r1e = (r0 + b0) >> 1;
|
||||
g1e = (g0 + b0) >> 1;
|
||||
b1e = b0;
|
||||
|
||||
retval = 1;
|
||||
}
|
||||
|
||||
if (r0e < 0)
|
||||
r0e = 0;
|
||||
else if (r0e > 255)
|
||||
r0e = 255;
|
||||
|
||||
if (g0e < 0)
|
||||
g0e = 0;
|
||||
else if (g0e > 255)
|
||||
g0e = 255;
|
||||
|
||||
if (b0e < 0)
|
||||
b0e = 0;
|
||||
else if (b0e > 255)
|
||||
b0e = 255;
|
||||
|
||||
if (r1e < 0)
|
||||
r1e = 0;
|
||||
else if (r1e > 255)
|
||||
r1e = 255;
|
||||
|
||||
if (g1e < 0)
|
||||
g1e = 0;
|
||||
else if (g1e > 255)
|
||||
g1e = 255;
|
||||
|
||||
if (b1e < 0)
|
||||
b1e = 0;
|
||||
else if (b1e > 255)
|
||||
b1e = 255;
|
||||
|
||||
output0->x = r0e;
|
||||
output0->y = g0e;
|
||||
output0->z = b0e;
|
||||
output0->w = 0xFF;
|
||||
|
||||
output1->x = r1e;
|
||||
output1->y = g1e;
|
||||
output1->z = b1e;
|
||||
output1->w = 0xFF;
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
int rgb_unpack(const int input[6], int quantization_level, ushort4 * output0, ushort4 * output1)
|
||||
{
|
||||
|
||||
int ri0b = color_unquantization_tables[quantization_level][input[0]];
|
||||
int ri1b = color_unquantization_tables[quantization_level][input[1]];
|
||||
int gi0b = color_unquantization_tables[quantization_level][input[2]];
|
||||
int gi1b = color_unquantization_tables[quantization_level][input[3]];
|
||||
int bi0b = color_unquantization_tables[quantization_level][input[4]];
|
||||
int bi1b = color_unquantization_tables[quantization_level][input[5]];
|
||||
|
||||
if (ri0b + gi0b + bi0b > ri1b + gi1b + bi1b)
|
||||
{
|
||||
// blue-contraction
|
||||
ri0b = (ri0b + bi0b) >> 1;
|
||||
gi0b = (gi0b + bi0b) >> 1;
|
||||
ri1b = (ri1b + bi1b) >> 1;
|
||||
gi1b = (gi1b + bi1b) >> 1;
|
||||
|
||||
output0->x = ri1b;
|
||||
output0->y = gi1b;
|
||||
output0->z = bi1b;
|
||||
output0->w = 255;
|
||||
|
||||
output1->x = ri0b;
|
||||
output1->y = gi0b;
|
||||
output1->z = bi0b;
|
||||
output1->w = 255;
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
output0->x = ri0b;
|
||||
output0->y = gi0b;
|
||||
output0->z = bi0b;
|
||||
output0->w = 255;
|
||||
|
||||
output1->x = ri1b;
|
||||
output1->y = gi1b;
|
||||
output1->z = bi1b;
|
||||
output1->w = 255;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void rgba_unpack(const int input[8], int quantization_level, ushort4 * output0, ushort4 * output1)
|
||||
{
|
||||
int order = rgb_unpack(input, quantization_level, output0, output1);
|
||||
if (order == 0)
|
||||
{
|
||||
output0->w = color_unquantization_tables[quantization_level][input[6]];
|
||||
output1->w = color_unquantization_tables[quantization_level][input[7]];
|
||||
}
|
||||
else
|
||||
{
|
||||
output0->w = color_unquantization_tables[quantization_level][input[7]];
|
||||
output1->w = color_unquantization_tables[quantization_level][input[6]];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void rgba_delta_unpack(const int input[8], int quantization_level, ushort4 * output0, ushort4 * output1)
|
||||
{
|
||||
int a0 = color_unquantization_tables[quantization_level][input[6]];
|
||||
int a1 = color_unquantization_tables[quantization_level][input[7]];
|
||||
a0 |= (a1 & 0x80) << 1;
|
||||
a1 &= 0x7F;
|
||||
if (a1 & 0x40)
|
||||
a1 -= 0x80;
|
||||
a0 >>= 1;
|
||||
a1 >>= 1;
|
||||
a1 += a0;
|
||||
|
||||
if (a1 < 0)
|
||||
a1 = 0;
|
||||
else if (a1 > 255)
|
||||
a1 = 255;
|
||||
|
||||
int order = rgb_delta_unpack(input, quantization_level, output0, output1);
|
||||
if (order == 0)
|
||||
{
|
||||
output0->w = a0;
|
||||
output1->w = a1;
|
||||
}
|
||||
else
|
||||
{
|
||||
output0->w = a1;
|
||||
output1->w = a0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void rgb_scale_unpack(const int input[4], int quantization_level, ushort4 * output0, ushort4 * output1)
|
||||
{
|
||||
int ir = color_unquantization_tables[quantization_level][input[0]];
|
||||
int ig = color_unquantization_tables[quantization_level][input[1]];
|
||||
int ib = color_unquantization_tables[quantization_level][input[2]];
|
||||
|
||||
int iscale = color_unquantization_tables[quantization_level][input[3]];
|
||||
|
||||
*output1 = ushort4(ir, ig, ib, 255);
|
||||
*output0 = ushort4((ir * iscale) >> 8, (ig * iscale) >> 8, (ib * iscale) >> 8, 255);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void rgb_scale_alpha_unpack(const int input[6], int quantization_level, ushort4 * output0, ushort4 * output1)
|
||||
{
|
||||
rgb_scale_unpack(input, quantization_level, output0, output1);
|
||||
output0->w = color_unquantization_tables[quantization_level][input[4]];
|
||||
output1->w = color_unquantization_tables[quantization_level][input[5]];
|
||||
|
||||
}
|
||||
|
||||
|
||||
void luminance_unpack(const int input[2], int quantization_level, ushort4 * output0, ushort4 * output1)
|
||||
{
|
||||
int lum0 = color_unquantization_tables[quantization_level][input[0]];
|
||||
int lum1 = color_unquantization_tables[quantization_level][input[1]];
|
||||
*output0 = ushort4(lum0, lum0, lum0, 255);
|
||||
*output1 = ushort4(lum1, lum1, lum1, 255);
|
||||
}
|
||||
|
||||
|
||||
void luminance_delta_unpack(const int input[2], int quantization_level, ushort4 * output0, ushort4 * output1)
|
||||
{
|
||||
int v0 = color_unquantization_tables[quantization_level][input[0]];
|
||||
int v1 = color_unquantization_tables[quantization_level][input[1]];
|
||||
int l0 = (v0 >> 2) | (v1 & 0xC0);
|
||||
int l1 = l0 + (v1 & 0x3F);
|
||||
|
||||
if (l1 > 255)
|
||||
l1 = 255;
|
||||
|
||||
*output0 = ushort4(l0, l0, l0, 255);
|
||||
*output1 = ushort4(l1, l1, l1, 255);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void luminance_alpha_unpack(const int input[4], int quantization_level, ushort4 * output0, ushort4 * output1)
|
||||
{
|
||||
int lum0 = color_unquantization_tables[quantization_level][input[0]];
|
||||
int lum1 = color_unquantization_tables[quantization_level][input[1]];
|
||||
int alpha0 = color_unquantization_tables[quantization_level][input[2]];
|
||||
int alpha1 = color_unquantization_tables[quantization_level][input[3]];
|
||||
*output0 = ushort4(lum0, lum0, lum0, alpha0);
|
||||
*output1 = ushort4(lum1, lum1, lum1, alpha1);
|
||||
}
|
||||
|
||||
|
||||
void luminance_alpha_delta_unpack(const int input[4], int quantization_level, ushort4 * output0, ushort4 * output1)
|
||||
{
|
||||
int lum0 = color_unquantization_tables[quantization_level][input[0]];
|
||||
int lum1 = color_unquantization_tables[quantization_level][input[1]];
|
||||
int alpha0 = color_unquantization_tables[quantization_level][input[2]];
|
||||
int alpha1 = color_unquantization_tables[quantization_level][input[3]];
|
||||
|
||||
lum0 |= (lum1 & 0x80) << 1;
|
||||
alpha0 |= (alpha1 & 0x80) << 1;
|
||||
lum1 &= 0x7F;
|
||||
alpha1 &= 0x7F;
|
||||
if (lum1 & 0x40)
|
||||
lum1 -= 0x80;
|
||||
if (alpha1 & 0x40)
|
||||
alpha1 -= 0x80;
|
||||
|
||||
lum0 >>= 1;
|
||||
lum1 >>= 1;
|
||||
alpha0 >>= 1;
|
||||
alpha1 >>= 1;
|
||||
lum1 += lum0;
|
||||
alpha1 += alpha0;
|
||||
|
||||
if (lum1 < 0)
|
||||
lum1 = 0;
|
||||
else if (lum1 > 255)
|
||||
lum1 = 255;
|
||||
|
||||
if (alpha1 < 0)
|
||||
alpha1 = 0;
|
||||
else if (alpha1 > 255)
|
||||
alpha1 = 255;
|
||||
|
||||
*output0 = ushort4(lum0, lum0, lum0, alpha0);
|
||||
*output1 = ushort4(lum1, lum1, lum1, alpha1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// RGB-offset format
|
||||
void hdr_rgbo_unpack3(const int input[4], int quantization_level, ushort4 * output0, ushort4 * output1)
|
||||
{
|
||||
int v0 = color_unquantization_tables[quantization_level][input[0]];
|
||||
int v1 = color_unquantization_tables[quantization_level][input[1]];
|
||||
int v2 = color_unquantization_tables[quantization_level][input[2]];
|
||||
int v3 = color_unquantization_tables[quantization_level][input[3]];
|
||||
|
||||
int modeval = ((v0 & 0xC0) >> 6) | (((v1 & 0x80) >> 7) << 2) | (((v2 & 0x80) >> 7) << 3);
|
||||
|
||||
int majcomp;
|
||||
int mode;
|
||||
if ((modeval & 0xC) != 0xC)
|
||||
{
|
||||
majcomp = modeval >> 2;
|
||||
mode = modeval & 3;
|
||||
}
|
||||
else if (modeval != 0xF)
|
||||
{
|
||||
majcomp = modeval & 3;
|
||||
mode = 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
majcomp = 0;
|
||||
mode = 5;
|
||||
}
|
||||
|
||||
int red = v0 & 0x3F;
|
||||
int green = v1 & 0x1F;
|
||||
int blue = v2 & 0x1F;
|
||||
int scale = v3 & 0x1F;
|
||||
|
||||
int bit0 = (v1 >> 6) & 1;
|
||||
int bit1 = (v1 >> 5) & 1;
|
||||
int bit2 = (v2 >> 6) & 1;
|
||||
int bit3 = (v2 >> 5) & 1;
|
||||
int bit4 = (v3 >> 7) & 1;
|
||||
int bit5 = (v3 >> 6) & 1;
|
||||
int bit6 = (v3 >> 5) & 1;
|
||||
|
||||
int ohcomp = 1 << mode;
|
||||
|
||||
if (ohcomp & 0x30)
|
||||
green |= bit0 << 6;
|
||||
if (ohcomp & 0x3A)
|
||||
green |= bit1 << 5;
|
||||
if (ohcomp & 0x30)
|
||||
blue |= bit2 << 6;
|
||||
if (ohcomp & 0x3A)
|
||||
blue |= bit3 << 5;
|
||||
|
||||
if (ohcomp & 0x3D)
|
||||
scale |= bit6 << 5;
|
||||
if (ohcomp & 0x2D)
|
||||
scale |= bit5 << 6;
|
||||
if (ohcomp & 0x04)
|
||||
scale |= bit4 << 7;
|
||||
|
||||
if (ohcomp & 0x3B)
|
||||
red |= bit4 << 6;
|
||||
if (ohcomp & 0x04)
|
||||
red |= bit3 << 6;
|
||||
|
||||
if (ohcomp & 0x10)
|
||||
red |= bit5 << 7;
|
||||
if (ohcomp & 0x0F)
|
||||
red |= bit2 << 7;
|
||||
|
||||
if (ohcomp & 0x05)
|
||||
red |= bit1 << 8;
|
||||
if (ohcomp & 0x0A)
|
||||
red |= bit0 << 8;
|
||||
|
||||
if (ohcomp & 0x05)
|
||||
red |= bit0 << 9;
|
||||
if (ohcomp & 0x02)
|
||||
red |= bit6 << 9;
|
||||
|
||||
if (ohcomp & 0x01)
|
||||
red |= bit3 << 10;
|
||||
if (ohcomp & 0x02)
|
||||
red |= bit5 << 10;
|
||||
|
||||
|
||||
// expand to 12 bits.
|
||||
static const int shamts[6] = { 1, 1, 2, 3, 4, 5 };
|
||||
int shamt = shamts[mode];
|
||||
red <<= shamt;
|
||||
green <<= shamt;
|
||||
blue <<= shamt;
|
||||
scale <<= shamt;
|
||||
|
||||
// on modes 0 to 4, the values stored for "green" and "blue" are differentials,
|
||||
// not absolute values.
|
||||
if (mode != 5)
|
||||
{
|
||||
green = red - green;
|
||||
blue = red - blue;
|
||||
}
|
||||
|
||||
// switch around components.
|
||||
int temp;
|
||||
switch (majcomp)
|
||||
{
|
||||
case 1:
|
||||
temp = red;
|
||||
red = green;
|
||||
green = temp;
|
||||
break;
|
||||
case 2:
|
||||
temp = red;
|
||||
red = blue;
|
||||
blue = temp;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
int red0 = red - scale;
|
||||
int green0 = green - scale;
|
||||
int blue0 = blue - scale;
|
||||
|
||||
// clamp to [0,0xFFF].
|
||||
if (red < 0)
|
||||
red = 0;
|
||||
if (green < 0)
|
||||
green = 0;
|
||||
if (blue < 0)
|
||||
blue = 0;
|
||||
|
||||
if (red0 < 0)
|
||||
red0 = 0;
|
||||
if (green0 < 0)
|
||||
green0 = 0;
|
||||
if (blue0 < 0)
|
||||
blue0 = 0;
|
||||
|
||||
*output0 = ushort4(red0 << 4, green0 << 4, blue0 << 4, 0x7800);
|
||||
*output1 = ushort4(red << 4, green << 4, blue << 4, 0x7800);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void hdr_rgb_unpack3(const int input[6], int quantization_level, ushort4 * output0, ushort4 * output1)
|
||||
{
|
||||
|
||||
int v0 = color_unquantization_tables[quantization_level][input[0]];
|
||||
int v1 = color_unquantization_tables[quantization_level][input[1]];
|
||||
int v2 = color_unquantization_tables[quantization_level][input[2]];
|
||||
int v3 = color_unquantization_tables[quantization_level][input[3]];
|
||||
int v4 = color_unquantization_tables[quantization_level][input[4]];
|
||||
int v5 = color_unquantization_tables[quantization_level][input[5]];
|
||||
|
||||
// extract all the fixed-placement bitfields
|
||||
int modeval = ((v1 & 0x80) >> 7) | (((v2 & 0x80) >> 7) << 1) | (((v3 & 0x80) >> 7) << 2);
|
||||
|
||||
int majcomp = ((v4 & 0x80) >> 7) | (((v5 & 0x80) >> 7) << 1);
|
||||
|
||||
if (majcomp == 3)
|
||||
{
|
||||
*output0 = ushort4(v0 << 8, v2 << 8, (v4 & 0x7F) << 9, 0x7800);
|
||||
*output1 = ushort4(v1 << 8, v3 << 8, (v5 & 0x7F) << 9, 0x7800);
|
||||
return;
|
||||
}
|
||||
|
||||
int a = v0 | ((v1 & 0x40) << 2);
|
||||
int b0 = v2 & 0x3f;
|
||||
int b1 = v3 & 0x3f;
|
||||
int c = v1 & 0x3f;
|
||||
int d0 = v4 & 0x7f;
|
||||
int d1 = v5 & 0x7f;
|
||||
|
||||
// get hold of the number of bits in 'd0' and 'd1'
|
||||
static const int dbits_tab[8] = { 7, 6, 7, 6, 5, 6, 5, 6 };
|
||||
int dbits = dbits_tab[modeval];
|
||||
|
||||
// extract six variable-placement bits
|
||||
int bit0 = (v2 >> 6) & 1;
|
||||
int bit1 = (v3 >> 6) & 1;
|
||||
|
||||
int bit2 = (v4 >> 6) & 1;
|
||||
int bit3 = (v5 >> 6) & 1;
|
||||
int bit4 = (v4 >> 5) & 1;
|
||||
int bit5 = (v5 >> 5) & 1;
|
||||
|
||||
|
||||
// and prepend the variable-placement bits depending on mode.
|
||||
int ohmod = 1 << modeval; // one-hot-mode
|
||||
if (ohmod & 0xA4)
|
||||
a |= bit0 << 9;
|
||||
if (ohmod & 0x8)
|
||||
a |= bit2 << 9;
|
||||
if (ohmod & 0x50)
|
||||
a |= bit4 << 9;
|
||||
|
||||
if (ohmod & 0x50)
|
||||
a |= bit5 << 10;
|
||||
if (ohmod & 0xA0)
|
||||
a |= bit1 << 10;
|
||||
|
||||
if (ohmod & 0xC0)
|
||||
a |= bit2 << 11;
|
||||
|
||||
if (ohmod & 0x4)
|
||||
c |= bit1 << 6;
|
||||
if (ohmod & 0xE8)
|
||||
c |= bit3 << 6;
|
||||
|
||||
if (ohmod & 0x20)
|
||||
c |= bit2 << 7;
|
||||
|
||||
|
||||
if (ohmod & 0x5B)
|
||||
b0 |= bit0 << 6;
|
||||
if (ohmod & 0x5B)
|
||||
b1 |= bit1 << 6;
|
||||
|
||||
if (ohmod & 0x12)
|
||||
b0 |= bit2 << 7;
|
||||
if (ohmod & 0x12)
|
||||
b1 |= bit3 << 7;
|
||||
|
||||
if (ohmod & 0xAF)
|
||||
d0 |= bit4 << 5;
|
||||
if (ohmod & 0xAF)
|
||||
d1 |= bit5 << 5;
|
||||
if (ohmod & 0x5)
|
||||
d0 |= bit2 << 6;
|
||||
if (ohmod & 0x5)
|
||||
d1 |= bit3 << 6;
|
||||
|
||||
// sign-extend 'd0' and 'd1'
|
||||
// note: this code assumes that signed right-shift actually sign-fills, not zero-fills.
|
||||
int32_t d0x = d0;
|
||||
int32_t d1x = d1;
|
||||
int sx_shamt = 32 - dbits;
|
||||
d0x <<= sx_shamt;
|
||||
d0x >>= sx_shamt;
|
||||
d1x <<= sx_shamt;
|
||||
d1x >>= sx_shamt;
|
||||
d0 = d0x;
|
||||
d1 = d1x;
|
||||
|
||||
// expand all values to 12 bits, with left-shift as needed.
|
||||
int val_shamt = (modeval >> 1) ^ 3;
|
||||
a <<= val_shamt;
|
||||
b0 <<= val_shamt;
|
||||
b1 <<= val_shamt;
|
||||
c <<= val_shamt;
|
||||
d0 <<= val_shamt;
|
||||
d1 <<= val_shamt;
|
||||
|
||||
// then compute the actual color values.
|
||||
int red1 = a;
|
||||
int green1 = a - b0;
|
||||
int blue1 = a - b1;
|
||||
int red0 = a - c;
|
||||
int green0 = a - b0 - c - d0;
|
||||
int blue0 = a - b1 - c - d1;
|
||||
|
||||
// clamp the color components to [0,2^12 - 1]
|
||||
if (red0 < 0)
|
||||
red0 = 0;
|
||||
else if (red0 > 0xFFF)
|
||||
red0 = 0xFFF;
|
||||
|
||||
if (green0 < 0)
|
||||
green0 = 0;
|
||||
else if (green0 > 0xFFF)
|
||||
green0 = 0xFFF;
|
||||
|
||||
if (blue0 < 0)
|
||||
blue0 = 0;
|
||||
else if (blue0 > 0xFFF)
|
||||
blue0 = 0xFFF;
|
||||
|
||||
if (red1 < 0)
|
||||
red1 = 0;
|
||||
else if (red1 > 0xFFF)
|
||||
red1 = 0xFFF;
|
||||
|
||||
if (green1 < 0)
|
||||
green1 = 0;
|
||||
else if (green1 > 0xFFF)
|
||||
green1 = 0xFFF;
|
||||
|
||||
if (blue1 < 0)
|
||||
blue1 = 0;
|
||||
else if (blue1 > 0xFFF)
|
||||
blue1 = 0xFFF;
|
||||
|
||||
|
||||
// switch around the color components
|
||||
int temp0, temp1;
|
||||
switch (majcomp)
|
||||
{
|
||||
case 1: // switch around red and green
|
||||
temp0 = red0;
|
||||
temp1 = red1;
|
||||
red0 = green0;
|
||||
red1 = green1;
|
||||
green0 = temp0;
|
||||
green1 = temp1;
|
||||
break;
|
||||
case 2: // switch around red and blue
|
||||
temp0 = red0;
|
||||
temp1 = red1;
|
||||
red0 = blue0;
|
||||
red1 = blue1;
|
||||
blue0 = temp0;
|
||||
blue1 = temp1;
|
||||
break;
|
||||
case 0: // no switch
|
||||
break;
|
||||
}
|
||||
|
||||
*output0 = ushort4(red0 << 4, green0 << 4, blue0 << 4, 0x7800);
|
||||
*output1 = ushort4(red1 << 4, green1 << 4, blue1 << 4, 0x7800);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void hdr_rgb_ldr_alpha_unpack3(const int input[8], int quantization_level, ushort4 * output0, ushort4 * output1)
|
||||
{
|
||||
hdr_rgb_unpack3(input, quantization_level, output0, output1);
|
||||
|
||||
int v6 = color_unquantization_tables[quantization_level][input[6]];
|
||||
int v7 = color_unquantization_tables[quantization_level][input[7]];
|
||||
output0->w = v6;
|
||||
output1->w = v7;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void hdr_luminance_small_range_unpack(const int input[2], int quantization_level, ushort4 * output0, ushort4 * output1)
|
||||
{
|
||||
int v0 = color_unquantization_tables[quantization_level][input[0]];
|
||||
int v1 = color_unquantization_tables[quantization_level][input[1]];
|
||||
|
||||
int y0, y1;
|
||||
if (v0 & 0x80)
|
||||
{
|
||||
y0 = ((v1 & 0xE0) << 4) | ((v0 & 0x7F) << 2);
|
||||
y1 = (v1 & 0x1F) << 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
y0 = ((v1 & 0xF0) << 4) | ((v0 & 0x7F) << 1);
|
||||
y1 = (v1 & 0xF) << 1;
|
||||
}
|
||||
|
||||
y1 += y0;
|
||||
if (y1 > 0xFFF)
|
||||
y1 = 0xFFF;
|
||||
|
||||
*output0 = ushort4(y0 << 4, y0 << 4, y0 << 4, 0x7800);
|
||||
*output1 = ushort4(y1 << 4, y1 << 4, y1 << 4, 0x7800);
|
||||
}
|
||||
|
||||
|
||||
void hdr_luminance_large_range_unpack(const int input[2], int quantization_level, ushort4 * output0, ushort4 * output1)
|
||||
{
|
||||
int v0 = color_unquantization_tables[quantization_level][input[0]];
|
||||
int v1 = color_unquantization_tables[quantization_level][input[1]];
|
||||
|
||||
int y0, y1;
|
||||
if (v1 >= v0)
|
||||
{
|
||||
y0 = v0 << 4;
|
||||
y1 = v1 << 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
y0 = (v1 << 4) + 8;
|
||||
y1 = (v0 << 4) - 8;
|
||||
}
|
||||
*output0 = ushort4(y0 << 4, y0 << 4, y0 << 4, 0x7800);
|
||||
*output1 = ushort4(y1 << 4, y1 << 4, y1 << 4, 0x7800);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void hdr_alpha_unpack(const int input[2], int quantization_level, int *a0, int *a1)
|
||||
{
|
||||
|
||||
int v6 = color_unquantization_tables[quantization_level][input[0]];
|
||||
int v7 = color_unquantization_tables[quantization_level][input[1]];
|
||||
|
||||
int selector = ((v6 >> 7) & 1) | ((v7 >> 6) & 2);
|
||||
v6 &= 0x7F;
|
||||
v7 &= 0x7F;
|
||||
if (selector == 3)
|
||||
{
|
||||
*a0 = v6 << 5;
|
||||
*a1 = v7 << 5;
|
||||
}
|
||||
else
|
||||
{
|
||||
v6 |= (v7 << (selector + 1)) & 0x780;
|
||||
v7 &= (0x3f >> selector);
|
||||
v7 ^= 32 >> selector;
|
||||
v7 -= 32 >> selector;
|
||||
v6 <<= (4 - selector);
|
||||
v7 <<= (4 - selector);
|
||||
v7 += v6;
|
||||
|
||||
if (v7 < 0)
|
||||
v7 = 0;
|
||||
else if (v7 > 0xFFF)
|
||||
v7 = 0xFFF;
|
||||
|
||||
*a0 = v6;
|
||||
*a1 = v7;
|
||||
}
|
||||
|
||||
*a0 <<= 4;
|
||||
*a1 <<= 4;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void hdr_rgb_hdr_alpha_unpack3(const int input[8], int quantization_level, ushort4 * output0, ushort4 * output1)
|
||||
{
|
||||
hdr_rgb_unpack3(input, quantization_level, output0, output1);
|
||||
|
||||
int alpha0, alpha1;
|
||||
hdr_alpha_unpack(input + 6, quantization_level, &alpha0, &alpha1);
|
||||
|
||||
output0->w = alpha0;
|
||||
output1->w = alpha1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void unpack_color_endpoints(astc_decode_mode decode_mode, int format, int quantization_level, const int *input, int *rgb_hdr, int *alpha_hdr, int *nan_endpoint, ushort4 * output0, ushort4 * output1)
|
||||
{
|
||||
*nan_endpoint = 0;
|
||||
|
||||
switch (format)
|
||||
{
|
||||
case FMT_LUMINANCE:
|
||||
*rgb_hdr = 0;
|
||||
*alpha_hdr = 0;
|
||||
luminance_unpack(input, quantization_level, output0, output1);
|
||||
break;
|
||||
|
||||
case FMT_LUMINANCE_DELTA:
|
||||
*rgb_hdr = 0;
|
||||
*alpha_hdr = 0;
|
||||
luminance_delta_unpack(input, quantization_level, output0, output1);
|
||||
break;
|
||||
|
||||
case FMT_HDR_LUMINANCE_SMALL_RANGE:
|
||||
*rgb_hdr = 1;
|
||||
*alpha_hdr = -1;
|
||||
hdr_luminance_small_range_unpack(input, quantization_level, output0, output1);
|
||||
break;
|
||||
|
||||
case FMT_HDR_LUMINANCE_LARGE_RANGE:
|
||||
*rgb_hdr = 1;
|
||||
*alpha_hdr = -1;
|
||||
hdr_luminance_large_range_unpack(input, quantization_level, output0, output1);
|
||||
break;
|
||||
|
||||
case FMT_LUMINANCE_ALPHA:
|
||||
*rgb_hdr = 0;
|
||||
*alpha_hdr = 0;
|
||||
luminance_alpha_unpack(input, quantization_level, output0, output1);
|
||||
break;
|
||||
|
||||
case FMT_LUMINANCE_ALPHA_DELTA:
|
||||
*rgb_hdr = 0;
|
||||
*alpha_hdr = 0;
|
||||
luminance_alpha_delta_unpack(input, quantization_level, output0, output1);
|
||||
break;
|
||||
|
||||
case FMT_RGB_SCALE:
|
||||
*rgb_hdr = 0;
|
||||
*alpha_hdr = 0;
|
||||
rgb_scale_unpack(input, quantization_level, output0, output1);
|
||||
break;
|
||||
|
||||
case FMT_RGB_SCALE_ALPHA:
|
||||
*rgb_hdr = 0;
|
||||
*alpha_hdr = 0;
|
||||
rgb_scale_alpha_unpack(input, quantization_level, output0, output1);
|
||||
break;
|
||||
|
||||
case FMT_HDR_RGB_SCALE:
|
||||
*rgb_hdr = 1;
|
||||
*alpha_hdr = -1;
|
||||
hdr_rgbo_unpack3(input, quantization_level, output0, output1);
|
||||
break;
|
||||
|
||||
case FMT_RGB:
|
||||
*rgb_hdr = 0;
|
||||
*alpha_hdr = 0;
|
||||
rgb_unpack(input, quantization_level, output0, output1);
|
||||
break;
|
||||
|
||||
case FMT_RGB_DELTA:
|
||||
*rgb_hdr = 0;
|
||||
*alpha_hdr = 0;
|
||||
rgb_delta_unpack(input, quantization_level, output0, output1);
|
||||
break;
|
||||
|
||||
case FMT_HDR_RGB:
|
||||
*rgb_hdr = 1;
|
||||
*alpha_hdr = -1;
|
||||
hdr_rgb_unpack3(input, quantization_level, output0, output1);
|
||||
break;
|
||||
|
||||
case FMT_RGBA:
|
||||
*rgb_hdr = 0;
|
||||
*alpha_hdr = 0;
|
||||
rgba_unpack(input, quantization_level, output0, output1);
|
||||
break;
|
||||
|
||||
case FMT_RGBA_DELTA:
|
||||
*rgb_hdr = 0;
|
||||
*alpha_hdr = 0;
|
||||
rgba_delta_unpack(input, quantization_level, output0, output1);
|
||||
break;
|
||||
|
||||
case FMT_HDR_RGB_LDR_ALPHA:
|
||||
*rgb_hdr = 1;
|
||||
*alpha_hdr = 0;
|
||||
hdr_rgb_ldr_alpha_unpack3(input, quantization_level, output0, output1);
|
||||
break;
|
||||
|
||||
case FMT_HDR_RGBA:
|
||||
*rgb_hdr = 1;
|
||||
*alpha_hdr = 1;
|
||||
hdr_rgb_hdr_alpha_unpack3(input, quantization_level, output0, output1);
|
||||
break;
|
||||
|
||||
default:
|
||||
ASTC_CODEC_INTERNAL_ERROR;
|
||||
}
|
||||
|
||||
|
||||
|
||||
if (*alpha_hdr == -1)
|
||||
{
|
||||
if (alpha_force_use_of_hdr)
|
||||
{
|
||||
output0->w = 0x7800;
|
||||
output1->w = 0x7800;
|
||||
*alpha_hdr = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
output0->w = 0x00FF;
|
||||
output1->w = 0x00FF;
|
||||
*alpha_hdr = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
switch (decode_mode)
|
||||
{
|
||||
case DECODE_LDR_SRGB:
|
||||
if (*rgb_hdr == 1)
|
||||
{
|
||||
output0->x = 0xFF00;
|
||||
output0->y = 0x0000;
|
||||
output0->z = 0xFF00;
|
||||
output0->w = 0xFF00;
|
||||
output1->x = 0xFF00;
|
||||
output1->y = 0x0000;
|
||||
output1->z = 0xFF00;
|
||||
output1->w = 0xFF00;
|
||||
}
|
||||
else
|
||||
{
|
||||
output0->x *= 257;
|
||||
output0->y *= 257;
|
||||
output0->z *= 257;
|
||||
output0->w *= 257;
|
||||
output1->x *= 257;
|
||||
output1->y *= 257;
|
||||
output1->z *= 257;
|
||||
output1->w *= 257;
|
||||
}
|
||||
*rgb_hdr = 0;
|
||||
*alpha_hdr = 0;
|
||||
break;
|
||||
|
||||
case DECODE_LDR:
|
||||
if (*rgb_hdr == 1)
|
||||
{
|
||||
output0->x = 0xFFFF;
|
||||
output0->y = 0xFFFF;
|
||||
output0->z = 0xFFFF;
|
||||
output0->w = 0xFFFF;
|
||||
output1->x = 0xFFFF;
|
||||
output1->y = 0xFFFF;
|
||||
output1->z = 0xFFFF;
|
||||
output1->w = 0xFFFF;
|
||||
*nan_endpoint = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
output0->x *= 257;
|
||||
output0->y *= 257;
|
||||
output0->z *= 257;
|
||||
output0->w *= 257;
|
||||
output1->x *= 257;
|
||||
output1->y *= 257;
|
||||
output1->z *= 257;
|
||||
output1->w *= 257;
|
||||
}
|
||||
*rgb_hdr = 0;
|
||||
*alpha_hdr = 0;
|
||||
break;
|
||||
|
||||
case DECODE_HDR:
|
||||
|
||||
if (*rgb_hdr == 0)
|
||||
{
|
||||
output0->x *= 257;
|
||||
output0->y *= 257;
|
||||
output0->z *= 257;
|
||||
output1->x *= 257;
|
||||
output1->y *= 257;
|
||||
output1->z *= 257;
|
||||
}
|
||||
if (*alpha_hdr == 0)
|
||||
{
|
||||
output0->w *= 257;
|
||||
output1->w *= 257;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
1792
3rdparty/astc/astc_compress_symbolic.cpp
vendored
Normal file
1792
3rdparty/astc/astc_compress_symbolic.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
524
3rdparty/astc/astc_compute_variance.cpp
vendored
Normal file
524
3rdparty/astc/astc_compute_variance.cpp
vendored
Normal file
@@ -0,0 +1,524 @@
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This confidential and proprietary software may be used only as
|
||||
* authorised by a licensing agreement from ARM Limited
|
||||
* (C) COPYRIGHT 2011-2012 ARM Limited
|
||||
* ALL RIGHTS RESERVED
|
||||
*
|
||||
* The entire notice above must be reproduced on all authorised
|
||||
* copies and copies may only be made to the extent permitted
|
||||
* by a licensing agreement from ARM Limited.
|
||||
*
|
||||
* @brief ASTC functions to calculate, for each pixel and each color component,
|
||||
* its variance within an NxN footprint; we want N to be parametric.
|
||||
*
|
||||
* The routine below uses summed area tables in order to perform the
|
||||
* computation in O(1) time per pixel, independent of big N is.
|
||||
*/
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
#include "astc_codec_internals.h"
|
||||
|
||||
#include <math.h>
|
||||
#include "mathlib.h"
|
||||
#include "softfloat.h"
|
||||
|
||||
float4 *** input_averages;
|
||||
float *** input_alpha_averages;
|
||||
float4 *** input_variances;
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
// routine to compute averages and variances for a pixel region.
|
||||
// The routine computes both in a single pass, using a summed-area table
|
||||
// to decouple the running time from the averaging/variance kernel size.
|
||||
|
||||
static void compute_pixel_region_variance(const astc_codec_image * img, float rgb_power_to_use, float alpha_power_to_use, swizzlepattern swz, int use_z_axis,
|
||||
int source_xoffset,int source_yoffset, int source_zoffset, // position of upper-left pixel in data set
|
||||
int xsize, int ysize, int zsize, // the size of the region to actually compute averages and variances for.
|
||||
int avg_var_kernel_radius, int alpha_kernel_radius,
|
||||
int dest_xoffset, int dest_yoffset, int dest_zoffset)
|
||||
{
|
||||
int x, y, z;
|
||||
|
||||
int kernel_radius = MAX(avg_var_kernel_radius, alpha_kernel_radius);
|
||||
int kerneldim = 2 * kernel_radius + 1;
|
||||
|
||||
// allocate memory
|
||||
int xpadsize = xsize + kerneldim;
|
||||
int ypadsize = ysize + kerneldim;
|
||||
int zpadsize = zsize + (use_z_axis ? kerneldim : 1);
|
||||
|
||||
double4 ***varbuf1 = new double4 **[zpadsize];
|
||||
double4 ***varbuf2 = new double4 **[zpadsize];
|
||||
varbuf1[0] = new double4 *[ypadsize * zpadsize];
|
||||
varbuf2[0] = new double4 *[ypadsize * zpadsize];
|
||||
varbuf1[0][0] = new double4[xpadsize * ypadsize * zpadsize];
|
||||
varbuf2[0][0] = new double4[xpadsize * ypadsize * zpadsize];
|
||||
|
||||
|
||||
for (z = 1; z < zpadsize; z++)
|
||||
{
|
||||
varbuf1[z] = varbuf1[0] + ypadsize * z;
|
||||
varbuf2[z] = varbuf2[0] + ypadsize * z;
|
||||
varbuf1[z][0] = varbuf1[0][0] + xpadsize * ypadsize * z;
|
||||
varbuf2[z][0] = varbuf2[0][0] + xpadsize * ypadsize * z;
|
||||
}
|
||||
|
||||
for (z = 0; z < zpadsize; z++)
|
||||
for (y = 1; y < ypadsize; y++)
|
||||
{
|
||||
varbuf1[z][y] = varbuf1[z][0] + xpadsize * y;
|
||||
varbuf2[z][y] = varbuf2[z][0] + xpadsize * y;
|
||||
}
|
||||
|
||||
int powers_are_1 = (rgb_power_to_use == 1.0f) && (alpha_power_to_use == 1.0f);
|
||||
|
||||
|
||||
// load x and x^2 values into the allocated buffers
|
||||
if (img->imagedata8)
|
||||
{
|
||||
uint8_t data[6];
|
||||
data[4] = 0;
|
||||
data[5] = 255;
|
||||
|
||||
for (z = 0; z < zpadsize - 1; z++)
|
||||
{
|
||||
int z_src = z + source_zoffset - (use_z_axis ? kernel_radius : 0);
|
||||
for (y = 0; y < ypadsize - 1; y++)
|
||||
{
|
||||
int y_src = y + source_yoffset - kernel_radius;
|
||||
for (x = 0; x < xpadsize - 1; x++)
|
||||
{
|
||||
int x_src = x + source_xoffset - kernel_radius;
|
||||
data[0] = img->imagedata8[z_src][y_src][4 * x_src + 0];
|
||||
data[1] = img->imagedata8[z_src][y_src][4 * x_src + 1];
|
||||
data[2] = img->imagedata8[z_src][y_src][4 * x_src + 2];
|
||||
data[3] = img->imagedata8[z_src][y_src][4 * x_src + 3];
|
||||
|
||||
uint8_t r = data[swz.r];
|
||||
uint8_t g = data[swz.g];
|
||||
uint8_t b = data[swz.b];
|
||||
uint8_t a = data[swz.a];
|
||||
|
||||
double4 d = double4(r * (1.0 / 255.0),
|
||||
g * (1.0 / 255.0),
|
||||
b * (1.0 / 255.0),
|
||||
a * (1.0 / 255.0));
|
||||
|
||||
if (perform_srgb_transform)
|
||||
{
|
||||
d.x = (d.x <= 0.04045) ? d.x * (1.0 / 12.92) : (d.x <= 1) ? pow((d.x + 0.055) * (1.0 / 1.055), 2.4) : d.x;
|
||||
d.y = (d.y <= 0.04045) ? d.y * (1.0 / 12.92) : (d.y <= 1) ? pow((d.y + 0.055) * (1.0 / 1.055), 2.4) : d.y;
|
||||
d.z = (d.z <= 0.04045) ? d.z * (1.0 / 12.92) : (d.z <= 1) ? pow((d.z + 0.055) * (1.0 / 1.055), 2.4) : d.z;
|
||||
}
|
||||
|
||||
if (!powers_are_1)
|
||||
{
|
||||
d.x = pow(MAX(d.x, 1e-6), (double)rgb_power_to_use);
|
||||
d.y = pow(MAX(d.y, 1e-6), (double)rgb_power_to_use);
|
||||
d.z = pow(MAX(d.z, 1e-6), (double)rgb_power_to_use);
|
||||
d.w = pow(MAX(d.w, 1e-6), (double)alpha_power_to_use);
|
||||
}
|
||||
|
||||
varbuf1[z][y][x] = d;
|
||||
varbuf2[z][y][x] = d * d;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
uint16_t data[6];
|
||||
data[4] = 0;
|
||||
data[5] = 0x3C00; // 1.0 encoded as FP16.
|
||||
|
||||
for (z = 0; z < zpadsize - 1; z++)
|
||||
{
|
||||
int z_src = z + source_zoffset - (use_z_axis ? kernel_radius : 0);
|
||||
for (y = 0; y < ypadsize - 1; y++)
|
||||
{
|
||||
int y_src = y + source_yoffset - kernel_radius;
|
||||
for (x = 0; x < xpadsize - 1; x++)
|
||||
{
|
||||
int x_src = x + source_xoffset - kernel_radius;
|
||||
data[0] = img->imagedata16[z_src][y_src][4 * x_src];
|
||||
data[1] = img->imagedata16[z_src][y_src][4 * x_src + 1];
|
||||
data[2] = img->imagedata16[z_src][y_src][4 * x_src + 2];
|
||||
data[3] = img->imagedata16[z_src][y_src][4 * x_src + 3];
|
||||
|
||||
uint16_t r = data[swz.r];
|
||||
uint16_t g = data[swz.g];
|
||||
uint16_t b = data[swz.b];
|
||||
uint16_t a = data[swz.a];
|
||||
|
||||
double4 d = double4(sf16_to_float(r),
|
||||
sf16_to_float(g),
|
||||
sf16_to_float(b),
|
||||
sf16_to_float(a));
|
||||
|
||||
if (perform_srgb_transform)
|
||||
{
|
||||
d.x = (d.x <= 0.04045) ? d.x * (1.0 / 12.92) : (d.x <= 1) ? pow((d.x + 0.055) * (1.0 / 1.055), 2.4) : d.x;
|
||||
d.y = (d.y <= 0.04045) ? d.y * (1.0 / 12.92) : (d.y <= 1) ? pow((d.y + 0.055) * (1.0 / 1.055), 2.4) : d.y;
|
||||
d.z = (d.z <= 0.04045) ? d.z * (1.0 / 12.92) : (d.z <= 1) ? pow((d.z + 0.055) * (1.0 / 1.055), 2.4) : d.z;
|
||||
}
|
||||
|
||||
if (!powers_are_1)
|
||||
{
|
||||
d.x = pow(MAX(d.x, 1e-6), (double)rgb_power_to_use);
|
||||
d.y = pow(MAX(d.y, 1e-6), (double)rgb_power_to_use);
|
||||
d.z = pow(MAX(d.z, 1e-6), (double)rgb_power_to_use);
|
||||
d.w = pow(MAX(d.w, 1e-6), (double)alpha_power_to_use);
|
||||
}
|
||||
|
||||
varbuf1[z][y][x] = d;
|
||||
varbuf2[z][y][x] = d * d;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// pad out buffers with 0s
|
||||
for (z = 0; z < zpadsize; z++)
|
||||
{
|
||||
for (y = 0; y < ypadsize; y++)
|
||||
{
|
||||
varbuf1[z][y][xpadsize - 1] = double4(0.0, 0.0, 0.0, 0.0);
|
||||
varbuf2[z][y][xpadsize - 1] = double4(0.0, 0.0, 0.0, 0.0);
|
||||
}
|
||||
for (x = 0; x < xpadsize; x++)
|
||||
{
|
||||
varbuf1[z][ypadsize - 1][x] = double4(0.0, 0.0, 0.0, 0.0);
|
||||
varbuf2[z][ypadsize - 1][x] = double4(0.0, 0.0, 0.0, 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
if (use_z_axis)
|
||||
for (y = 0; y < ypadsize; y++)
|
||||
for (x = 0; x < xpadsize; x++)
|
||||
{
|
||||
varbuf1[zpadsize - 1][y][x] = double4(0.0, 0.0, 0.0, 0.0);
|
||||
varbuf2[zpadsize - 1][y][x] = double4(0.0, 0.0, 0.0, 0.0);
|
||||
}
|
||||
|
||||
|
||||
// generate summed-area tables for x and x2; this is done in-place
|
||||
for (z = 0; z < zpadsize; z++)
|
||||
for (y = 0; y < ypadsize; y++)
|
||||
{
|
||||
double4 summa1 = double4(0.0, 0.0, 0.0, 0.0);
|
||||
double4 summa2 = double4(0.0, 0.0, 0.0, 0.0);
|
||||
for (x = 0; x < xpadsize; x++)
|
||||
{
|
||||
double4 val1 = varbuf1[z][y][x];
|
||||
double4 val2 = varbuf2[z][y][x];
|
||||
varbuf1[z][y][x] = summa1;
|
||||
varbuf2[z][y][x] = summa2;
|
||||
summa1 = summa1 + val1;
|
||||
summa2 = summa2 + val2;
|
||||
}
|
||||
}
|
||||
|
||||
for (z = 0; z < zpadsize; z++)
|
||||
for (x = 0; x < xpadsize; x++)
|
||||
{
|
||||
double4 summa1 = double4(0.0, 0.0, 0.0, 0.0);
|
||||
double4 summa2 = double4(0.0, 0.0, 0.0, 0.0);
|
||||
for (y = 0; y < ypadsize; y++)
|
||||
{
|
||||
double4 val1 = varbuf1[z][y][x];
|
||||
double4 val2 = varbuf2[z][y][x];
|
||||
varbuf1[z][y][x] = summa1;
|
||||
varbuf2[z][y][x] = summa2;
|
||||
summa1 = summa1 + val1;
|
||||
summa2 = summa2 + val2;
|
||||
}
|
||||
}
|
||||
|
||||
if (use_z_axis)
|
||||
for (y = 0; y < ypadsize; y++)
|
||||
for (x = 0; x < xpadsize; x++)
|
||||
{
|
||||
double4 summa1 = double4(0.0, 0.0, 0.0, 0.0);
|
||||
double4 summa2 = double4(0.0, 0.0, 0.0, 0.0);
|
||||
for (z = 0; z < zpadsize; z++)
|
||||
{
|
||||
double4 val1 = varbuf1[z][y][x];
|
||||
double4 val2 = varbuf2[z][y][x];
|
||||
varbuf1[z][y][x] = summa1;
|
||||
varbuf2[z][y][x] = summa2;
|
||||
summa1 = summa1 + val1;
|
||||
summa2 = summa2 + val2;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int avg_var_kerneldim = 2 * avg_var_kernel_radius + 1;
|
||||
int alpha_kerneldim = 2 * alpha_kernel_radius + 1;
|
||||
|
||||
|
||||
// compute a few constants used in the variance-calculation.
|
||||
double avg_var_samples;
|
||||
double alpha_rsamples;
|
||||
double mul1;
|
||||
|
||||
if (use_z_axis)
|
||||
{
|
||||
avg_var_samples = avg_var_kerneldim * avg_var_kerneldim * avg_var_kerneldim;
|
||||
alpha_rsamples = 1.0 / (alpha_kerneldim * alpha_kerneldim * alpha_kerneldim);
|
||||
}
|
||||
else
|
||||
{
|
||||
avg_var_samples = avg_var_kerneldim * avg_var_kerneldim;
|
||||
alpha_rsamples = 1.0 / (alpha_kerneldim * alpha_kerneldim);
|
||||
}
|
||||
|
||||
|
||||
double avg_var_rsamples = 1.0 / avg_var_samples;
|
||||
if (avg_var_samples == 1)
|
||||
mul1 = 1.0;
|
||||
else
|
||||
mul1 = 1.0 / (avg_var_samples * (avg_var_samples - 1));
|
||||
|
||||
|
||||
double mul2 = avg_var_samples * mul1;
|
||||
|
||||
|
||||
// use the summed-area tables to compute variance for each sample-neighborhood
|
||||
if (use_z_axis)
|
||||
{
|
||||
for (z = 0; z < zsize; z++)
|
||||
{
|
||||
int z_src = z + kernel_radius;
|
||||
int z_dst = z + dest_zoffset;
|
||||
for (y = 0; y < ysize; y++)
|
||||
{
|
||||
int y_src = y + kernel_radius;
|
||||
int y_dst = y + dest_yoffset;
|
||||
|
||||
for (x = 0; x < xsize; x++)
|
||||
{
|
||||
int x_src = x + kernel_radius;
|
||||
int x_dst = x + dest_xoffset;
|
||||
|
||||
// summed-area table lookups for alpha average
|
||||
double vasum =
|
||||
(varbuf1[z_src + 1][y_src - alpha_kernel_radius][x_src - alpha_kernel_radius].w
|
||||
- varbuf1[z_src + 1][y_src - alpha_kernel_radius][x_src + alpha_kernel_radius + 1].w
|
||||
- varbuf1[z_src + 1][y_src + alpha_kernel_radius + 1][x_src - alpha_kernel_radius].w
|
||||
+ varbuf1[z_src + 1][y_src + alpha_kernel_radius + 1][x_src + alpha_kernel_radius + 1].w) -
|
||||
(varbuf1[z_src][y_src - alpha_kernel_radius][x_src - alpha_kernel_radius].w
|
||||
- varbuf1[z_src][y_src - alpha_kernel_radius][x_src + alpha_kernel_radius + 1].w
|
||||
- varbuf1[z_src][y_src + alpha_kernel_radius + 1][x_src - alpha_kernel_radius].w + varbuf1[z_src][y_src + alpha_kernel_radius + 1][x_src + alpha_kernel_radius + 1].w);
|
||||
input_alpha_averages[z_dst][y_dst][x_dst] = static_cast < float >(vasum * alpha_rsamples);
|
||||
|
||||
|
||||
// summed-area table lookups for RGBA average
|
||||
double4 v0sum =
|
||||
(varbuf1[z_src + 1][y_src - avg_var_kernel_radius][x_src - avg_var_kernel_radius]
|
||||
- varbuf1[z_src + 1][y_src - avg_var_kernel_radius][x_src + avg_var_kernel_radius + 1]
|
||||
- varbuf1[z_src + 1][y_src + avg_var_kernel_radius + 1][x_src - avg_var_kernel_radius]
|
||||
+ varbuf1[z_src + 1][y_src + avg_var_kernel_radius + 1][x_src + avg_var_kernel_radius + 1]) -
|
||||
(varbuf1[z_src][y_src - avg_var_kernel_radius][x_src - avg_var_kernel_radius]
|
||||
- varbuf1[z_src][y_src - avg_var_kernel_radius][x_src + avg_var_kernel_radius + 1]
|
||||
- varbuf1[z_src][y_src + avg_var_kernel_radius + 1][x_src - avg_var_kernel_radius] + varbuf1[z_src][y_src + avg_var_kernel_radius + 1][x_src + avg_var_kernel_radius + 1]);
|
||||
|
||||
double4 avg = v0sum * avg_var_rsamples;
|
||||
|
||||
float4 favg = float4(static_cast < float >(avg.x),
|
||||
static_cast < float >(avg.y),
|
||||
static_cast < float >(avg.z),
|
||||
static_cast < float >(avg.w));
|
||||
input_averages[z_dst][y_dst][x_dst] = favg;
|
||||
|
||||
|
||||
// summed-area table lookups for variance
|
||||
double4 v1sum =
|
||||
(varbuf1[z_src + 1][y_src - avg_var_kernel_radius][x_src - avg_var_kernel_radius]
|
||||
- varbuf1[z_src + 1][y_src - avg_var_kernel_radius][x_src + avg_var_kernel_radius + 1]
|
||||
- varbuf1[z_src + 1][y_src + avg_var_kernel_radius + 1][x_src - avg_var_kernel_radius]
|
||||
+ varbuf1[z_src + 1][y_src + avg_var_kernel_radius + 1][x_src + avg_var_kernel_radius + 1]) -
|
||||
(varbuf1[z_src][y_src - avg_var_kernel_radius][x_src - avg_var_kernel_radius]
|
||||
- varbuf1[z_src][y_src - avg_var_kernel_radius][x_src + avg_var_kernel_radius + 1]
|
||||
- varbuf1[z_src][y_src + avg_var_kernel_radius + 1][x_src - avg_var_kernel_radius] + varbuf1[z_src][y_src + avg_var_kernel_radius + 1][x_src + avg_var_kernel_radius + 1]);
|
||||
double4 v2sum =
|
||||
(varbuf2[z_src + 1][y_src - avg_var_kernel_radius][x_src - avg_var_kernel_radius]
|
||||
- varbuf2[z_src + 1][y_src - avg_var_kernel_radius][x_src + avg_var_kernel_radius + 1]
|
||||
- varbuf2[z_src + 1][y_src + avg_var_kernel_radius + 1][x_src - avg_var_kernel_radius]
|
||||
+ varbuf2[z_src + 1][y_src + avg_var_kernel_radius + 1][x_src + avg_var_kernel_radius + 1]) -
|
||||
(varbuf2[z_src][y_src - avg_var_kernel_radius][x_src - avg_var_kernel_radius]
|
||||
- varbuf2[z_src][y_src - avg_var_kernel_radius][x_src + avg_var_kernel_radius + 1]
|
||||
- varbuf2[z_src][y_src + avg_var_kernel_radius + 1][x_src - avg_var_kernel_radius] + varbuf2[z_src][y_src + avg_var_kernel_radius + 1][x_src + avg_var_kernel_radius + 1]);
|
||||
|
||||
// the actual variance
|
||||
double4 variance = mul2 * v2sum - mul1 * (v1sum * v1sum);
|
||||
|
||||
float4 fvar = float4(static_cast < float >(variance.x),
|
||||
static_cast < float >(variance.y),
|
||||
static_cast < float >(variance.z),
|
||||
static_cast < float >(variance.w));
|
||||
input_variances[z_dst][y_dst][x_dst] = fvar;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (z = 0; z < zsize; z++)
|
||||
{
|
||||
int z_src = z;
|
||||
int z_dst = z + dest_zoffset;
|
||||
for (y = 0; y < ysize; y++)
|
||||
{
|
||||
int y_src = y + kernel_radius;
|
||||
int y_dst = y + dest_yoffset;
|
||||
|
||||
for (x = 0; x < xsize; x++)
|
||||
{
|
||||
int x_src = x + kernel_radius;
|
||||
int x_dst = x + dest_xoffset;
|
||||
|
||||
// summed-area table lookups for alpha average
|
||||
double vasum =
|
||||
varbuf1[z_src][y_src - alpha_kernel_radius][x_src - alpha_kernel_radius].w
|
||||
- varbuf1[z_src][y_src - alpha_kernel_radius][x_src + alpha_kernel_radius + 1].w
|
||||
- varbuf1[z_src][y_src + alpha_kernel_radius + 1][x_src - alpha_kernel_radius].w + varbuf1[z_src][y_src + alpha_kernel_radius + 1][x_src + alpha_kernel_radius + 1].w;
|
||||
input_alpha_averages[z_dst][y_dst][x_dst] = static_cast < float >(vasum * alpha_rsamples);
|
||||
|
||||
|
||||
// summed-area table lookups for RGBA average
|
||||
double4 v0sum =
|
||||
varbuf1[z_src][y_src - avg_var_kernel_radius][x_src - avg_var_kernel_radius]
|
||||
- varbuf1[z_src][y_src - avg_var_kernel_radius][x_src + avg_var_kernel_radius + 1]
|
||||
- varbuf1[z_src][y_src + avg_var_kernel_radius + 1][x_src - avg_var_kernel_radius] + varbuf1[z_src][y_src + avg_var_kernel_radius + 1][x_src + avg_var_kernel_radius + 1];
|
||||
|
||||
double4 avg = v0sum * avg_var_rsamples;
|
||||
|
||||
float4 favg = float4(static_cast < float >(avg.x),
|
||||
static_cast < float >(avg.y),
|
||||
static_cast < float >(avg.z),
|
||||
static_cast < float >(avg.w));
|
||||
input_averages[z_dst][y_dst][x_dst] = favg;
|
||||
|
||||
|
||||
// summed-area table lookups for variance
|
||||
double4 v1sum =
|
||||
varbuf1[z_src][y_src - avg_var_kernel_radius][x_src - avg_var_kernel_radius]
|
||||
- varbuf1[z_src][y_src - avg_var_kernel_radius][x_src + avg_var_kernel_radius + 1]
|
||||
- varbuf1[z_src][y_src + avg_var_kernel_radius + 1][x_src - avg_var_kernel_radius] + varbuf1[z_src][y_src + avg_var_kernel_radius + 1][x_src + avg_var_kernel_radius + 1];
|
||||
double4 v2sum =
|
||||
varbuf2[z_src][y_src - avg_var_kernel_radius][x_src - avg_var_kernel_radius]
|
||||
- varbuf2[z_src][y_src - avg_var_kernel_radius][x_src + avg_var_kernel_radius + 1]
|
||||
- varbuf2[z_src][y_src + avg_var_kernel_radius + 1][x_src - avg_var_kernel_radius] + varbuf2[z_src][y_src + avg_var_kernel_radius + 1][x_src + avg_var_kernel_radius + 1];
|
||||
|
||||
// the actual variance
|
||||
double4 variance = mul2 * v2sum - mul1 * (v1sum * v1sum);
|
||||
|
||||
float4 fvar = float4(static_cast < float >(variance.x),
|
||||
static_cast < float >(variance.y),
|
||||
static_cast < float >(variance.z),
|
||||
static_cast < float >(variance.w));
|
||||
input_variances[z_dst][y_dst][x_dst] = fvar;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
delete[]varbuf2[0][0];
|
||||
delete[]varbuf1[0][0];
|
||||
delete[]varbuf2[0];
|
||||
delete[]varbuf1[0];
|
||||
delete[]varbuf2;
|
||||
delete[]varbuf1;
|
||||
}
|
||||
|
||||
|
||||
static void allocate_input_average_and_variance_buffers(int xsize, int ysize, int zsize)
|
||||
{
|
||||
int y, z;
|
||||
if (input_averages)
|
||||
{
|
||||
delete[]input_averages[0][0];
|
||||
delete[]input_averages[0];
|
||||
delete[]input_averages;
|
||||
}
|
||||
if (input_variances)
|
||||
{
|
||||
delete[]input_variances[0][0];
|
||||
delete[]input_variances[0];
|
||||
delete[]input_variances;
|
||||
}
|
||||
if (input_alpha_averages)
|
||||
{
|
||||
delete[]input_alpha_averages[0][0];
|
||||
delete[]input_alpha_averages[0];
|
||||
delete[]input_alpha_averages;
|
||||
}
|
||||
|
||||
input_averages = new float4 **[zsize];
|
||||
input_variances = new float4 **[zsize];
|
||||
input_alpha_averages = new float **[zsize];
|
||||
|
||||
|
||||
input_averages[0] = new float4 *[ysize * zsize];
|
||||
input_variances[0] = new float4 *[ysize * zsize];
|
||||
input_alpha_averages[0] = new float *[ysize * zsize];
|
||||
|
||||
input_averages[0][0] = new float4[xsize * ysize * zsize];
|
||||
input_variances[0][0] = new float4[xsize * ysize * zsize];
|
||||
input_alpha_averages[0][0] = new float[xsize * ysize * zsize];
|
||||
|
||||
for (z = 1; z < zsize; z++)
|
||||
{
|
||||
input_averages[z] = input_averages[0] + z * ysize;
|
||||
input_variances[z] = input_variances[0] + z * ysize;
|
||||
input_alpha_averages[z] = input_alpha_averages[0] + z * ysize;
|
||||
|
||||
input_averages[z][0] = input_averages[0][0] + z * ysize * xsize;
|
||||
input_variances[z][0] = input_variances[0][0] + z * ysize * xsize;
|
||||
input_alpha_averages[z][0] = input_alpha_averages[0][0] + z * ysize * xsize;
|
||||
}
|
||||
|
||||
for (z = 0; z < zsize; z++)
|
||||
for (y = 1; y < ysize; y++)
|
||||
{
|
||||
input_averages[z][y] = input_averages[z][0] + y * xsize;
|
||||
input_variances[z][y] = input_variances[z][0] + y * xsize;
|
||||
input_alpha_averages[z][y] = input_alpha_averages[z][0] + y * xsize;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
// compute averages and variances for the current input image.
|
||||
void compute_averages_and_variances(const astc_codec_image * img, float rgb_power_to_use, float alpha_power_to_use, int avg_var_kernel_radius, int alpha_kernel_radius, swizzlepattern swz)
|
||||
{
|
||||
int xsize = img->xsize;
|
||||
int ysize = img->ysize;
|
||||
int zsize = img->zsize;
|
||||
allocate_input_average_and_variance_buffers(xsize, ysize, zsize);
|
||||
|
||||
|
||||
int x, y, z;
|
||||
for (z = 0; z < zsize; z += 32)
|
||||
{
|
||||
int zblocksize = MIN(32, zsize - z);
|
||||
for (y = 0; y < ysize; y += 32)
|
||||
{
|
||||
int yblocksize = MIN(32, ysize - y);
|
||||
for (x = 0; x < xsize; x += 32)
|
||||
{
|
||||
int xblocksize = MIN(32, xsize - x);
|
||||
compute_pixel_region_variance(img,
|
||||
rgb_power_to_use,
|
||||
alpha_power_to_use,
|
||||
swz,
|
||||
(zsize > 1),
|
||||
x + img->padding,
|
||||
y + img->padding, z + (zsize > 1 ? img->padding : 0), xblocksize, yblocksize, zblocksize, avg_var_kernel_radius, alpha_kernel_radius, x, y, z);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
317
3rdparty/astc/astc_decompress_symbolic.cpp
vendored
Normal file
317
3rdparty/astc/astc_decompress_symbolic.cpp
vendored
Normal file
@@ -0,0 +1,317 @@
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This confidential and proprietary software may be used only as
|
||||
* authorised by a licensing agreement from ARM Limited
|
||||
* (C) COPYRIGHT 2011-2012 ARM Limited
|
||||
* ALL RIGHTS RESERVED
|
||||
*
|
||||
* The entire notice above must be reproduced on all authorised
|
||||
* copies and copies may only be made to the extent permitted
|
||||
* by a licensing agreement from ARM Limited.
|
||||
*
|
||||
* @brief Decompress a block of colors, expressed as a symbolic block,
|
||||
* for ASTC.
|
||||
*/
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "astc_codec_internals.h"
|
||||
|
||||
#include "softfloat.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int compute_value_of_texel_int(int texel_to_get, const decimation_table * it, const int *weights)
|
||||
{
|
||||
int i;
|
||||
int summed_value = 8;
|
||||
int weights_to_evaluate = it->texel_num_weights[texel_to_get];
|
||||
for (i = 0; i < weights_to_evaluate; i++)
|
||||
{
|
||||
summed_value += weights[it->texel_weights[texel_to_get][i]] * it->texel_weights_int[texel_to_get][i];
|
||||
}
|
||||
return summed_value >> 4;
|
||||
}
|
||||
|
||||
|
||||
ushort4 lerp_color_int(astc_decode_mode decode_mode, ushort4 color0, ushort4 color1, int weight, int plane2_weight, int plane2_color_component // -1 in 1-plane mode
|
||||
)
|
||||
{
|
||||
int4 ecolor0 = int4(color0.x, color0.y, color0.z, color0.w);
|
||||
int4 ecolor1 = int4(color1.x, color1.y, color1.z, color1.w);
|
||||
|
||||
int4 eweight1 = int4(weight, weight, weight, weight);
|
||||
switch (plane2_color_component)
|
||||
{
|
||||
case 0:
|
||||
eweight1.x = plane2_weight;
|
||||
break;
|
||||
case 1:
|
||||
eweight1.y = plane2_weight;
|
||||
break;
|
||||
case 2:
|
||||
eweight1.z = plane2_weight;
|
||||
break;
|
||||
case 3:
|
||||
eweight1.w = plane2_weight;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
int4 eweight0 = int4(64, 64, 64, 64) - eweight1;
|
||||
|
||||
if (decode_mode == DECODE_LDR_SRGB)
|
||||
{
|
||||
ecolor0 = ecolor0 >> 8;
|
||||
ecolor1 = ecolor1 >> 8;
|
||||
}
|
||||
int4 color = (ecolor0 * eweight0) + (ecolor1 * eweight1) + int4(32, 32, 32, 32);
|
||||
color = color >> 6;
|
||||
if (decode_mode == DECODE_LDR_SRGB)
|
||||
color = color | (color << 8);
|
||||
|
||||
ushort4 rcolor = ushort4(color.x, color.y, color.z, color.w);
|
||||
return rcolor;
|
||||
}
|
||||
|
||||
|
||||
void decompress_symbolic_block(astc_decode_mode decode_mode,
|
||||
int xdim, int ydim, int zdim, // dimensions of block
|
||||
int xpos, int ypos, int zpos, // position of block
|
||||
const symbolic_compressed_block * scb, imageblock * blk)
|
||||
{
|
||||
blk->xpos = xpos;
|
||||
blk->ypos = ypos;
|
||||
blk->zpos = zpos;
|
||||
|
||||
int i;
|
||||
|
||||
// if we detected an error-block, blow up immediately.
|
||||
if (scb->error_block)
|
||||
{
|
||||
if (decode_mode == DECODE_LDR_SRGB)
|
||||
{
|
||||
for (i = 0; i < xdim * ydim * zdim; i++)
|
||||
{
|
||||
blk->orig_data[4 * i] = 1.0f;
|
||||
blk->orig_data[4 * i + 1] = 0.0f;
|
||||
blk->orig_data[4 * i + 2] = 1.0f;
|
||||
blk->orig_data[4 * i + 3] = 1.0f;
|
||||
blk->rgb_lns[i] = 0;
|
||||
blk->alpha_lns[i] = 0;
|
||||
blk->nan_texel[i] = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < xdim * ydim * zdim; i++)
|
||||
{
|
||||
blk->orig_data[4 * i] = 0.0f;
|
||||
blk->orig_data[4 * i + 1] = 0.0f;
|
||||
blk->orig_data[4 * i + 2] = 0.0f;
|
||||
blk->orig_data[4 * i + 3] = 0.0f;
|
||||
blk->rgb_lns[i] = 0;
|
||||
blk->alpha_lns[i] = 0;
|
||||
blk->nan_texel[i] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
imageblock_initialize_work_from_orig(blk, xdim * ydim * zdim);
|
||||
update_imageblock_flags(blk, xdim, ydim, zdim);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
if (scb->block_mode < 0)
|
||||
{
|
||||
float red = 0, green = 0, blue = 0, alpha = 0;
|
||||
int use_lns = 0;
|
||||
int use_nan = 0;
|
||||
|
||||
if (scb->block_mode == -2)
|
||||
{
|
||||
// For sRGB decoding, we should return only the top 8 bits.
|
||||
int mask = (decode_mode == DECODE_LDR_SRGB) ? 0xFF00 : 0xFFFF;
|
||||
|
||||
red = sf16_to_float(unorm16_to_sf16(scb->constant_color[0] & mask));
|
||||
green = sf16_to_float(unorm16_to_sf16(scb->constant_color[1] & mask));
|
||||
blue = sf16_to_float(unorm16_to_sf16(scb->constant_color[2] & mask));
|
||||
alpha = sf16_to_float(unorm16_to_sf16(scb->constant_color[3] & mask));
|
||||
use_lns = 0;
|
||||
use_nan = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (decode_mode)
|
||||
{
|
||||
case DECODE_LDR_SRGB:
|
||||
red = 1.0f;
|
||||
green = 0.0f;
|
||||
blue = 1.0f;
|
||||
alpha = 1.0f;
|
||||
use_lns = 0;
|
||||
use_nan = 0;
|
||||
break;
|
||||
case DECODE_LDR:
|
||||
red = 0.0f;
|
||||
green = 0.0f;
|
||||
blue = 0.0f;
|
||||
alpha = 0.0f;
|
||||
use_lns = 0;
|
||||
use_nan = 1;
|
||||
break;
|
||||
case DECODE_HDR:
|
||||
// constant-color block; unpack from FP16 to FP32.
|
||||
red = sf16_to_float(scb->constant_color[0]);
|
||||
green = sf16_to_float(scb->constant_color[1]);
|
||||
blue = sf16_to_float(scb->constant_color[2]);
|
||||
alpha = sf16_to_float(scb->constant_color[3]);
|
||||
use_lns = 1;
|
||||
use_nan = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < xdim * ydim * zdim; i++)
|
||||
{
|
||||
blk->orig_data[4 * i] = red;
|
||||
blk->orig_data[4 * i + 1] = green;
|
||||
blk->orig_data[4 * i + 2] = blue;
|
||||
blk->orig_data[4 * i + 3] = alpha;
|
||||
blk->rgb_lns[i] = use_lns;
|
||||
blk->alpha_lns[i] = use_lns;
|
||||
blk->nan_texel[i] = use_nan;
|
||||
}
|
||||
|
||||
|
||||
imageblock_initialize_work_from_orig(blk, xdim * ydim * zdim);
|
||||
update_imageblock_flags(blk, xdim, ydim, zdim);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
// get the appropriate partition-table entry
|
||||
int partition_count = scb->partition_count;
|
||||
const partition_info *pt = get_partition_table(xdim, ydim, zdim, partition_count);
|
||||
pt += scb->partition_index;
|
||||
|
||||
// get the appropriate block descriptor
|
||||
const block_size_descriptor *bsd = get_block_size_descriptor(xdim, ydim, zdim);
|
||||
const decimation_table *const *ixtab2 = bsd->decimation_tables;
|
||||
|
||||
|
||||
const decimation_table *it = ixtab2[bsd->block_modes[scb->block_mode].decimation_mode];
|
||||
|
||||
int is_dual_plane = bsd->block_modes[scb->block_mode].is_dual_plane;
|
||||
|
||||
int weight_quantization_level = bsd->block_modes[scb->block_mode].quantization_mode;
|
||||
|
||||
|
||||
// decode the color endpoints
|
||||
ushort4 color_endpoint0[4];
|
||||
ushort4 color_endpoint1[4];
|
||||
int rgb_hdr_endpoint[4];
|
||||
int alpha_hdr_endpoint[4];
|
||||
int nan_endpoint[4];
|
||||
|
||||
for (i = 0; i < partition_count; i++)
|
||||
unpack_color_endpoints(decode_mode,
|
||||
scb->color_formats[i],
|
||||
scb->color_quantization_level, scb->color_values[i], &(rgb_hdr_endpoint[i]), &(alpha_hdr_endpoint[i]), &(nan_endpoint[i]), &(color_endpoint0[i]), &(color_endpoint1[i]));
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// first unquantize the weights
|
||||
int uq_plane1_weights[MAX_WEIGHTS_PER_BLOCK];
|
||||
int uq_plane2_weights[MAX_WEIGHTS_PER_BLOCK];
|
||||
int weight_count = it->num_weights;
|
||||
|
||||
|
||||
const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[weight_quantization_level]);
|
||||
|
||||
for (i = 0; i < weight_count; i++)
|
||||
{
|
||||
uq_plane1_weights[i] = qat->unquantized_value[scb->plane1_weights[i]];
|
||||
}
|
||||
if (is_dual_plane)
|
||||
{
|
||||
for (i = 0; i < weight_count; i++)
|
||||
uq_plane2_weights[i] = qat->unquantized_value[scb->plane2_weights[i]];
|
||||
}
|
||||
|
||||
|
||||
// then undecimate them.
|
||||
int weights[MAX_TEXELS_PER_BLOCK];
|
||||
int plane2_weights[MAX_TEXELS_PER_BLOCK];
|
||||
|
||||
|
||||
int texels_per_block = xdim * ydim * zdim;
|
||||
for (i = 0; i < texels_per_block; i++)
|
||||
weights[i] = compute_value_of_texel_int(i, it, uq_plane1_weights);
|
||||
|
||||
if (is_dual_plane)
|
||||
for (i = 0; i < texels_per_block; i++)
|
||||
plane2_weights[i] = compute_value_of_texel_int(i, it, uq_plane2_weights);
|
||||
|
||||
|
||||
int plane2_color_component = scb->plane2_color_component;
|
||||
|
||||
|
||||
// now that we have endpoint colors and weights, we can unpack actual colors for
|
||||
// each texel.
|
||||
for (i = 0; i < texels_per_block; i++)
|
||||
{
|
||||
int partition = pt->partition_of_texel[i];
|
||||
|
||||
ushort4 color = lerp_color_int(decode_mode,
|
||||
color_endpoint0[partition],
|
||||
color_endpoint1[partition],
|
||||
weights[i],
|
||||
plane2_weights[i],
|
||||
is_dual_plane ? plane2_color_component : -1);
|
||||
|
||||
blk->rgb_lns[i] = rgb_hdr_endpoint[partition];
|
||||
blk->alpha_lns[i] = alpha_hdr_endpoint[partition];
|
||||
blk->nan_texel[i] = nan_endpoint[partition];
|
||||
|
||||
blk->work_data[4 * i] = color.x;
|
||||
blk->work_data[4 * i + 1] = color.y;
|
||||
blk->work_data[4 * i + 2] = color.z;
|
||||
blk->work_data[4 * i + 3] = color.w;
|
||||
}
|
||||
|
||||
imageblock_initialize_orig_from_work(blk, xdim * ydim * zdim);
|
||||
|
||||
update_imageblock_flags(blk, xdim, ydim, zdim);
|
||||
}
|
||||
|
||||
|
||||
|
||||
float compute_imageblock_difference(int xdim, int ydim, int zdim, const imageblock * p1, const imageblock * p2, const error_weight_block * ewb)
|
||||
{
|
||||
int i;
|
||||
int texels_per_block = xdim * ydim * zdim;
|
||||
float summa = 0.0f;
|
||||
const float *f1 = p1->work_data;
|
||||
const float *f2 = p2->work_data;
|
||||
for (i = 0; i < texels_per_block; i++)
|
||||
{
|
||||
float rdiff = fabsf(f1[4 * i] - f2[4 * i]);
|
||||
float gdiff = fabs(f1[4 * i + 1] - f2[4 * i + 1]);
|
||||
float bdiff = fabs(f1[4 * i + 2] - f2[4 * i + 2]);
|
||||
float adiff = fabs(f1[4 * i + 3] - f2[4 * i + 3]);
|
||||
rdiff = MIN(rdiff, 1e15f);
|
||||
gdiff = MIN(gdiff, 1e15f);
|
||||
bdiff = MIN(bdiff, 1e15f);
|
||||
adiff = MIN(adiff, 1e15f);
|
||||
|
||||
summa += rdiff * rdiff * ewb->error_weights[i].x + gdiff * gdiff * ewb->error_weights[i].y + bdiff * bdiff * ewb->error_weights[i].z + adiff * adiff * ewb->error_weights[i].w;
|
||||
}
|
||||
|
||||
return summa;
|
||||
}
|
||||
310
3rdparty/astc/astc_encoding_choice_error.cpp
vendored
Normal file
310
3rdparty/astc/astc_encoding_choice_error.cpp
vendored
Normal file
@@ -0,0 +1,310 @@
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This confidential and proprietary software may be used only as
|
||||
* authorised by a licensing agreement from ARM Limited
|
||||
* (C) COPYRIGHT 2011-2012 ARM Limited
|
||||
* ALL RIGHTS RESERVED
|
||||
*
|
||||
* The entire notice above must be reproduced on all authorised
|
||||
* copies and copies may only be made to the extent permitted
|
||||
* by a licensing agreement from ARM Limited.
|
||||
*
|
||||
* @brief Determine color errors for ASTC compression.
|
||||
*
|
||||
* We assume that there are two independent sources of color error in
|
||||
* any given partition.
|
||||
*
|
||||
* These are:
|
||||
* * quantization errors
|
||||
* * encoding choice errors
|
||||
*
|
||||
* Encoding choice errors are errors that come due to encoding choice,
|
||||
* such as:
|
||||
* * using luminance instead of RGB
|
||||
* * using RGB-scale instead of two RGB endpoints.
|
||||
* * dropping Alpha
|
||||
*
|
||||
* Quantization errors occur due to the limited precision we use for
|
||||
* storing numbers.
|
||||
*
|
||||
* Quantization errors generally scale with quantization level, but are
|
||||
* not actually independent of color encoding. In particular:
|
||||
* * if we can use offset encoding then quantization error is halved.
|
||||
* * if we can use blue-contraction, quantization error for red and
|
||||
* green is halved.
|
||||
* * quantization error is higher for the HDR endpoint modes.
|
||||
*
|
||||
* Other than these errors, quantization error is assumed to be
|
||||
* proportional to the quantization step.
|
||||
*/
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
#include "astc_codec_internals.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
// helper function to merge two endpoint-colors
|
||||
void merge_endpoints(const endpoints * ep1, // contains three of the color components
|
||||
const endpoints * ep2, // contains the remaining color component
|
||||
int separate_component, endpoints * res)
|
||||
{
|
||||
int i;
|
||||
int partition_count = ep1->partition_count;
|
||||
res->partition_count = partition_count;
|
||||
for (i = 0; i < partition_count; i++)
|
||||
{
|
||||
res->endpt0[i] = ep1->endpt0[i];
|
||||
res->endpt1[i] = ep1->endpt1[i];
|
||||
}
|
||||
|
||||
switch (separate_component)
|
||||
{
|
||||
case 0:
|
||||
for (i = 0; i < partition_count; i++)
|
||||
{
|
||||
res->endpt0[i].x = ep2->endpt0[i].x;
|
||||
res->endpt1[i].x = ep2->endpt1[i].x;
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
for (i = 0; i < partition_count; i++)
|
||||
{
|
||||
res->endpt0[i].y = ep2->endpt0[i].y;
|
||||
res->endpt1[i].y = ep2->endpt1[i].y;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
for (i = 0; i < partition_count; i++)
|
||||
{
|
||||
res->endpt0[i].z = ep2->endpt0[i].z;
|
||||
res->endpt1[i].z = ep2->endpt1[i].z;
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
for (i = 0; i < partition_count; i++)
|
||||
{
|
||||
res->endpt0[i].w = ep2->endpt0[i].w;
|
||||
res->endpt1[i].w = ep2->endpt1[i].w;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
for a given set of input colors and a given partitioning, determine: color error that results
|
||||
from RGB-scale encoding (relevant for LDR only) color error that results from RGB-lumashift encoding
|
||||
(relevant for HDR only) color error that results from luminance-encoding color error that results
|
||||
form dropping alpha. whether we are eligible for offset encoding whether we are eligible for
|
||||
blue-contraction
|
||||
|
||||
The input data are: color data partitioning error-weight data
|
||||
*/
|
||||
|
||||
|
||||
void compute_encoding_choice_errors(int xdim, int ydim, int zdim, const imageblock * pb, const partition_info * pi, const error_weight_block * ewb,
|
||||
int separate_component, // component that is separated out in 2-plane mode, -1 in 1-plane mode
|
||||
encoding_choice_errors * eci)
|
||||
{
|
||||
int i;
|
||||
|
||||
int partition_count = pi->partition_count;
|
||||
|
||||
int texels_per_block = xdim * ydim * zdim;
|
||||
|
||||
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
||||
if (print_diagnostics)
|
||||
{
|
||||
printf("%s : texels-per-block=%dx%dx%d, separate_component=%d, partition-count=%d\n", __func__, xdim, ydim, zdim, separate_component, partition_count);
|
||||
}
|
||||
#endif
|
||||
|
||||
float3 averages[4];
|
||||
float3 directions_rgb[4];
|
||||
float2 directions_rg[4];
|
||||
float2 directions_rb[4];
|
||||
float2 directions_gb[4];
|
||||
|
||||
float4 error_weightings[4];
|
||||
float4 color_scalefactors[4];
|
||||
float4 inverse_color_scalefactors[4];
|
||||
|
||||
compute_partition_error_color_weightings(xdim, ydim, zdim, ewb, pi, error_weightings, color_scalefactors);
|
||||
|
||||
compute_averages_and_directions_rgb(pi, pb, ewb, color_scalefactors, averages, directions_rgb, directions_rg, directions_rb, directions_gb);
|
||||
|
||||
line3 uncorr_rgb_lines[4];
|
||||
line3 samechroma_rgb_lines[4]; // for LDR-RGB-scale
|
||||
line3 rgb_luma_lines[4]; // for HDR-RGB-scale
|
||||
line3 luminance_lines[4];
|
||||
|
||||
processed_line3 proc_uncorr_rgb_lines[4];
|
||||
processed_line3 proc_samechroma_rgb_lines[4]; // for LDR-RGB-scale
|
||||
processed_line3 proc_rgb_luma_lines[4]; // for HDR-RGB-scale
|
||||
processed_line3 proc_luminance_lines[4];
|
||||
|
||||
|
||||
for (i = 0; i < partition_count; i++)
|
||||
{
|
||||
inverse_color_scalefactors[i].x = 1.0f / MAX(color_scalefactors[i].x, 1e-7f);
|
||||
inverse_color_scalefactors[i].y = 1.0f / MAX(color_scalefactors[i].y, 1e-7f);
|
||||
inverse_color_scalefactors[i].z = 1.0f / MAX(color_scalefactors[i].z, 1e-7f);
|
||||
inverse_color_scalefactors[i].w = 1.0f / MAX(color_scalefactors[i].w, 1e-7f);
|
||||
|
||||
|
||||
uncorr_rgb_lines[i].a = averages[i];
|
||||
if (dot(directions_rgb[i], directions_rgb[i]) == 0.0f)
|
||||
uncorr_rgb_lines[i].b = normalize(float3(color_scalefactors[i].xyz));
|
||||
else
|
||||
uncorr_rgb_lines[i].b = normalize(directions_rgb[i]);
|
||||
|
||||
samechroma_rgb_lines[i].a = float3(0, 0, 0);
|
||||
if (dot(averages[i], averages[i]) < 1e-20)
|
||||
samechroma_rgb_lines[i].b = normalize(float3(color_scalefactors[i].xyz));
|
||||
else
|
||||
samechroma_rgb_lines[i].b = normalize(averages[i]);
|
||||
|
||||
rgb_luma_lines[i].a = averages[i];
|
||||
rgb_luma_lines[i].b = normalize(color_scalefactors[i].xyz);
|
||||
|
||||
luminance_lines[i].a = float3(0, 0, 0);
|
||||
luminance_lines[i].b = normalize(color_scalefactors[i].xyz);
|
||||
|
||||
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
||||
if (print_diagnostics)
|
||||
{
|
||||
printf("Partition %d\n", i);
|
||||
printf("Average = <%g %g %g>\n", averages[i].x, averages[i].y, averages[i].z);
|
||||
printf("Uncorr-rgb-line = <%g %g %g> + t<%g %g %g>\n",
|
||||
uncorr_rgb_lines[i].a.x, uncorr_rgb_lines[i].a.y, uncorr_rgb_lines[i].a.z, uncorr_rgb_lines[i].b.x, uncorr_rgb_lines[i].b.y, uncorr_rgb_lines[i].b.z);
|
||||
printf("Samechroma-line = t<%g %g %g>\n", samechroma_rgb_lines[i].b.x, samechroma_rgb_lines[i].b.y, samechroma_rgb_lines[i].b.z);
|
||||
}
|
||||
#endif
|
||||
|
||||
proc_uncorr_rgb_lines[i].amod = (uncorr_rgb_lines[i].a - uncorr_rgb_lines[i].b * dot(uncorr_rgb_lines[i].a, uncorr_rgb_lines[i].b)) * inverse_color_scalefactors[i].xyz;
|
||||
proc_uncorr_rgb_lines[i].bs = uncorr_rgb_lines[i].b * color_scalefactors[i].xyz;
|
||||
proc_uncorr_rgb_lines[i].bis = uncorr_rgb_lines[i].b * inverse_color_scalefactors[i].xyz;
|
||||
|
||||
proc_samechroma_rgb_lines[i].amod = (samechroma_rgb_lines[i].a - samechroma_rgb_lines[i].b * dot(samechroma_rgb_lines[i].a, samechroma_rgb_lines[i].b)) * inverse_color_scalefactors[i].xyz;
|
||||
proc_samechroma_rgb_lines[i].bs = samechroma_rgb_lines[i].b * color_scalefactors[i].xyz;
|
||||
proc_samechroma_rgb_lines[i].bis = samechroma_rgb_lines[i].b * inverse_color_scalefactors[i].xyz;
|
||||
|
||||
proc_rgb_luma_lines[i].amod = (rgb_luma_lines[i].a - rgb_luma_lines[i].b * dot(rgb_luma_lines[i].a, rgb_luma_lines[i].b)) * inverse_color_scalefactors[i].xyz;
|
||||
proc_rgb_luma_lines[i].bs = rgb_luma_lines[i].b * color_scalefactors[i].xyz;
|
||||
proc_rgb_luma_lines[i].bis = rgb_luma_lines[i].b * inverse_color_scalefactors[i].xyz;
|
||||
|
||||
proc_luminance_lines[i].amod = (luminance_lines[i].a - luminance_lines[i].b * dot(luminance_lines[i].a, luminance_lines[i].b)) * inverse_color_scalefactors[i].xyz;
|
||||
proc_luminance_lines[i].bs = luminance_lines[i].b * color_scalefactors[i].xyz;
|
||||
proc_luminance_lines[i].bis = luminance_lines[i].b * inverse_color_scalefactors[i].xyz;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
float uncorr_rgb_error[4];
|
||||
float samechroma_rgb_error[4];
|
||||
float rgb_luma_error[4];
|
||||
float luminance_rgb_error[4];
|
||||
|
||||
|
||||
for (i = 0; i < partition_count; i++)
|
||||
{
|
||||
|
||||
uncorr_rgb_error[i] = compute_error_squared_rgb_single_partition(i, xdim, ydim, zdim, pi, pb, ewb, &(proc_uncorr_rgb_lines[i]));
|
||||
|
||||
samechroma_rgb_error[i] = compute_error_squared_rgb_single_partition(i, xdim, ydim, zdim, pi, pb, ewb, &(proc_samechroma_rgb_lines[i]));
|
||||
|
||||
rgb_luma_error[i] = compute_error_squared_rgb_single_partition(i, xdim, ydim, zdim, pi, pb, ewb, &(proc_rgb_luma_lines[i]));
|
||||
|
||||
luminance_rgb_error[i] = compute_error_squared_rgb_single_partition(i, xdim, ydim, zdim, pi, pb, ewb, &(proc_luminance_lines[i]));
|
||||
|
||||
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
||||
if (print_diagnostics)
|
||||
{
|
||||
printf("Partition %d : uncorr-error=%g samechroma-error=%g rgb-luma-error=%g lum-error=%g\n",
|
||||
i, uncorr_rgb_error[i], samechroma_rgb_error[i], rgb_luma_error[i], luminance_rgb_error[i]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// compute the error that arises from just ditching alpha and RGB
|
||||
float alpha_drop_error[4];
|
||||
float rgb_drop_error[4];
|
||||
for (i = 0; i < partition_count; i++)
|
||||
{
|
||||
alpha_drop_error[i] = 0;
|
||||
rgb_drop_error[i] = 0;
|
||||
}
|
||||
for (i = 0; i < texels_per_block; i++)
|
||||
{
|
||||
int partition = pi->partition_of_texel[i];
|
||||
float alpha = pb->work_data[4 * i + 3];
|
||||
float default_alpha = pb->alpha_lns[i] ? (float)0x7800 : (float)0xFFFF;
|
||||
|
||||
float omalpha = alpha - default_alpha;
|
||||
alpha_drop_error[partition] += omalpha * omalpha * ewb->error_weights[i].w;
|
||||
float red = pb->work_data[4 * i];
|
||||
float green = pb->work_data[4 * i + 1];
|
||||
float blue = pb->work_data[4 * i + 2];
|
||||
rgb_drop_error[partition] += red * red * ewb->error_weights[i].x + green * green * ewb->error_weights[i].y + blue * blue * ewb->error_weights[i].z;
|
||||
}
|
||||
|
||||
// check if we are eligible for blue-contraction and offset-encoding
|
||||
|
||||
endpoints ep;
|
||||
if (separate_component == -1)
|
||||
{
|
||||
endpoints_and_weights ei;
|
||||
compute_endpoints_and_ideal_weights_1_plane(xdim, ydim, zdim, pi, pb, ewb, &ei);
|
||||
ep = ei.ep;
|
||||
}
|
||||
else
|
||||
{
|
||||
endpoints_and_weights ei1, ei2;
|
||||
compute_endpoints_and_ideal_weights_2_planes(xdim, ydim, zdim, pi, pb, ewb, separate_component, &ei1, &ei2);
|
||||
|
||||
merge_endpoints(&(ei1.ep), &(ei2.ep), separate_component, &ep);
|
||||
}
|
||||
|
||||
int eligible_for_offset_encode[4];
|
||||
int eligible_for_blue_contraction[4];
|
||||
|
||||
for (i = 0; i < partition_count; i++)
|
||||
{
|
||||
float4 endpt0 = ep.endpt0[i];
|
||||
float4 endpt1 = ep.endpt1[i];
|
||||
float4 endpt_dif = endpt1 - endpt0;
|
||||
if (fabs(endpt_dif.x) < (0.12 * 65535.0f) && fabs(endpt_dif.y) < (0.12 * 65535.0f) && fabs(endpt_dif.z) < (0.12 * 65535.0f))
|
||||
eligible_for_offset_encode[i] = 1;
|
||||
else
|
||||
eligible_for_offset_encode[i] = 0;
|
||||
endpt0.x += (endpt0.x - endpt0.z);
|
||||
endpt0.y += (endpt0.y - endpt0.z);
|
||||
endpt1.x += (endpt1.x - endpt1.z);
|
||||
endpt1.y += (endpt1.y - endpt1.z);
|
||||
if (endpt0.x > (0.01f * 65535.0f) && endpt0.x < (0.99f * 65535.0f)
|
||||
&& endpt1.x > (0.01f * 65535.0f) && endpt1.x < (0.99f * 65535.0f)
|
||||
&& endpt0.y > (0.01f * 65535.0f) && endpt0.y < (0.99f * 65535.0f) && endpt1.y > (0.01f * 65535.0f) && endpt1.y < (0.99f * 65535.0f))
|
||||
eligible_for_blue_contraction[i] = 1;
|
||||
else
|
||||
eligible_for_blue_contraction[i] = 0;
|
||||
}
|
||||
|
||||
|
||||
// finally, gather up our results
|
||||
for (i = 0; i < partition_count; i++)
|
||||
{
|
||||
eci[i].rgb_scale_error = (samechroma_rgb_error[i] - uncorr_rgb_error[i]) * 0.7f; // empirical
|
||||
eci[i].rgb_luma_error = (rgb_luma_error[i] - uncorr_rgb_error[i]) * 1.5f; // wild guess
|
||||
eci[i].luminance_error = (luminance_rgb_error[i] - uncorr_rgb_error[i]) * 3.0f; // empirical
|
||||
eci[i].alpha_drop_error = alpha_drop_error[i] * 3.0f;
|
||||
eci[i].rgb_drop_error = rgb_drop_error[i] * 3.0f;
|
||||
eci[i].can_offset_encode = eligible_for_offset_encode[i];
|
||||
eci[i].can_blue_contract = eligible_for_blue_contraction[i];
|
||||
}
|
||||
}
|
||||
865
3rdparty/astc/astc_find_best_partitioning.cpp
vendored
Normal file
865
3rdparty/astc/astc_find_best_partitioning.cpp
vendored
Normal file
@@ -0,0 +1,865 @@
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This confidential and proprietary software may be used only as
|
||||
* authorised by a licensing agreement from ARM Limited
|
||||
* (C) COPYRIGHT 2011-2012 ARM Limited
|
||||
* ALL RIGHTS RESERVED
|
||||
*
|
||||
* The entire notice above must be reproduced on all authorised
|
||||
* copies and copies may only be made to the extent permitted
|
||||
* by a licensing agreement from ARM Limited.
|
||||
*
|
||||
* @brief ASTC encoding of texture
|
||||
*
|
||||
* major step 1:
|
||||
* * find best partitioning assuming uncorrelated colors
|
||||
* * find best partitioning assuming RGBS color representation
|
||||
*
|
||||
* finding best partitioning for a block:
|
||||
* * for each available partitioning:
|
||||
* * compute mean-color-value and dominant direction.
|
||||
* * this defines two lines, both of which go through the
|
||||
* mean-color-value:
|
||||
* * one line has a direction defined by the dominant direction;
|
||||
* this line is used to assess the error from using an uncorrelated
|
||||
* color representation.
|
||||
* * the other line goes through (0,0,0,1) and is used to assess the
|
||||
* error from using an RGBS color representation.
|
||||
* * we then compute, as a sum across the block, the squared-errors
|
||||
* that result from using the dominant-direction-lines and the
|
||||
* squared-errors that result from using the 0001-lines.
|
||||
*/
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
/*
|
||||
* Partition table representation:
|
||||
* We have 3 tables, each with 1024 partitionings
|
||||
* (these correspond to the 3x128 hardware partitionings crossed with all the
|
||||
* partition-transform modes in the hardware.)
|
||||
*
|
||||
* For each partitioning, we have:
|
||||
* * a 4-entry table indicating how many texels there are in each of the 4
|
||||
* partitions. this may be from 2 to about 60 or so.
|
||||
* * a 64-entry table indicating the partition index of each of the 64 texels
|
||||
* in the block. each index may be 0, 1, 2 or 3.
|
||||
*
|
||||
* each element in the table is an uint8_t indicating partition index (0, 1, 2 or 3)
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "astc_codec_internals.h"
|
||||
|
||||
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
#include "mathlib.h"
|
||||
|
||||
int imageblock_uses_alpha(int xdim, int ydim, int zdim, const imageblock * pb)
|
||||
{
|
||||
IGNORE(xdim);
|
||||
IGNORE(ydim);
|
||||
IGNORE(zdim);
|
||||
|
||||
return pb->alpha_max != pb->alpha_min;
|
||||
}
|
||||
|
||||
static void compute_alpha_minmax(int xdim, int ydim, int zdim, const partition_info * pt, const imageblock * blk, const error_weight_block * ewb, float *alpha_min, float *alpha_max)
|
||||
{
|
||||
int i;
|
||||
int partition_count = pt->partition_count;
|
||||
|
||||
int texels_per_block = xdim * ydim * zdim;
|
||||
|
||||
for (i = 0; i < partition_count; i++)
|
||||
{
|
||||
alpha_min[i] = 1e38f;
|
||||
alpha_max[i] = -1e38f;
|
||||
}
|
||||
|
||||
for (i = 0; i < texels_per_block; i++)
|
||||
{
|
||||
if (ewb->texel_weight[i] > 1e-10)
|
||||
{
|
||||
int partition = pt->partition_of_texel[i];
|
||||
float alphaval = blk->work_data[4 * i + 3];
|
||||
if (alphaval > alpha_max[partition])
|
||||
alpha_max[partition] = alphaval;
|
||||
if (alphaval < alpha_min[partition])
|
||||
alpha_min[partition] = alphaval;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < partition_count; i++)
|
||||
{
|
||||
if (alpha_min[i] >= alpha_max[i])
|
||||
{
|
||||
alpha_min[i] = 0;
|
||||
alpha_max[i] = 1e-10f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void compute_rgb_minmax(int xdim,
|
||||
int ydim,
|
||||
int zdim,
|
||||
const partition_info * pt,
|
||||
const imageblock * blk, const error_weight_block * ewb, float *red_min, float *red_max, float *green_min, float *green_max, float *blue_min, float *blue_max)
|
||||
{
|
||||
int i;
|
||||
int partition_count = pt->partition_count;
|
||||
int texels_per_block = xdim * ydim * zdim;
|
||||
|
||||
for (i = 0; i < partition_count; i++)
|
||||
{
|
||||
red_min[i] = 1e38f;
|
||||
red_max[i] = -1e38f;
|
||||
green_min[i] = 1e38f;
|
||||
green_max[i] = -1e38f;
|
||||
blue_min[i] = 1e38f;
|
||||
blue_max[i] = -1e38f;
|
||||
}
|
||||
|
||||
for (i = 0; i < texels_per_block; i++)
|
||||
{
|
||||
if (ewb->texel_weight[i] > 1e-10f)
|
||||
{
|
||||
int partition = pt->partition_of_texel[i];
|
||||
float redval = blk->work_data[4 * i];
|
||||
float greenval = blk->work_data[4 * i + 1];
|
||||
float blueval = blk->work_data[4 * i + 2];
|
||||
if (redval > red_max[partition])
|
||||
red_max[partition] = redval;
|
||||
if (redval < red_min[partition])
|
||||
red_min[partition] = redval;
|
||||
if (greenval > green_max[partition])
|
||||
green_max[partition] = greenval;
|
||||
if (greenval < green_min[partition])
|
||||
green_min[partition] = greenval;
|
||||
if (blueval > blue_max[partition])
|
||||
blue_max[partition] = blueval;
|
||||
if (blueval < blue_min[partition])
|
||||
blue_min[partition] = blueval;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < partition_count; i++)
|
||||
{
|
||||
if (red_min[i] >= red_max[i])
|
||||
{
|
||||
red_min[i] = 0.0f;
|
||||
red_max[i] = 1e-10f;
|
||||
}
|
||||
if (green_min[i] >= green_max[i])
|
||||
{
|
||||
green_min[i] = 0.0f;
|
||||
green_max[i] = 1e-10f;
|
||||
}
|
||||
if (blue_min[i] >= blue_max[i])
|
||||
{
|
||||
blue_min[i] = 0.0f;
|
||||
blue_max[i] = 1e-10f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void compute_partition_error_color_weightings(int xdim, int ydim, int zdim, const error_weight_block * ewb, const partition_info * pi, float4 error_weightings[4], float4 color_scalefactors[4])
|
||||
{
|
||||
int i;
|
||||
int texels_per_block = xdim * ydim * zdim;
|
||||
int pcnt = pi->partition_count;
|
||||
for (i = 0; i < pcnt; i++)
|
||||
error_weightings[i] = float4(1e-12f, 1e-12f, 1e-12f, 1e-12f);
|
||||
for (i = 0; i < texels_per_block; i++)
|
||||
{
|
||||
int part = pi->partition_of_texel[i];
|
||||
error_weightings[part] = error_weightings[part] + ewb->error_weights[i];
|
||||
}
|
||||
for (i = 0; i < pcnt; i++)
|
||||
{
|
||||
error_weightings[i] = error_weightings[i] * (1.0f / pi->texels_per_partition[i]);
|
||||
}
|
||||
for (i = 0; i < pcnt; i++)
|
||||
{
|
||||
color_scalefactors[i].x = sqrt(error_weightings[i].x);
|
||||
color_scalefactors[i].y = sqrt(error_weightings[i].y);
|
||||
color_scalefactors[i].z = sqrt(error_weightings[i].z);
|
||||
color_scalefactors[i].w = sqrt(error_weightings[i].w);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
main function to identify the best partitioning for a given number of texels */
|
||||
|
||||
|
||||
void find_best_partitionings(int partition_search_limit, int xdim, int ydim, int zdim, int partition_count,
|
||||
const imageblock * pb, const error_weight_block * ewb, int candidates_to_return,
|
||||
// best partitionings to use if the endpoint colors are assumed to be uncorrelated
|
||||
int *best_partitions_uncorrellated,
|
||||
// best partitionings to use if the endpoint colors have the same chroma
|
||||
int *best_partitions_samechroma,
|
||||
// best partitionings to use if using dual plane of weights
|
||||
int *best_partitions_dual_weight_planes)
|
||||
{
|
||||
|
||||
|
||||
int i, j;
|
||||
|
||||
int texels_per_block = xdim * ydim * zdim;
|
||||
|
||||
// constant used to estimate quantization error for a given partitioning;
|
||||
// the optimal value for this constant depends on bitrate.
|
||||
// These constants have been determined empirically.
|
||||
|
||||
float weight_imprecision_estim = 100;
|
||||
|
||||
if (texels_per_block <= 20)
|
||||
weight_imprecision_estim = 0.03f;
|
||||
else if (texels_per_block <= 31)
|
||||
weight_imprecision_estim = 0.04f;
|
||||
else if (texels_per_block <= 41)
|
||||
weight_imprecision_estim = 0.05f;
|
||||
else
|
||||
weight_imprecision_estim = 0.055f;
|
||||
|
||||
|
||||
int partition_sequence[PARTITION_COUNT];
|
||||
|
||||
kmeans_compute_partition_ordering(xdim, ydim, zdim, partition_count, pb, partition_sequence);
|
||||
|
||||
|
||||
float weight_imprecision_estim_squared = weight_imprecision_estim * weight_imprecision_estim;
|
||||
|
||||
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
||||
if (print_diagnostics)
|
||||
printf("weight_imprecision_estim = %g\n", weight_imprecision_estim);
|
||||
#endif
|
||||
|
||||
int uses_alpha = imageblock_uses_alpha(xdim, ydim, zdim, pb);
|
||||
|
||||
const partition_info *ptab = get_partition_table(xdim, ydim, zdim, partition_count);
|
||||
|
||||
// partitioning errors assuming uncorrelated-chrominance endpoints
|
||||
float uncorr_errors[PARTITION_COUNT];
|
||||
// partitioning errors assuming same-chrominance endpoints
|
||||
float samechroma_errors[PARTITION_COUNT];
|
||||
|
||||
// partitioning errors assuming that one of the color channels
|
||||
// is uncorrelated from all the other ones
|
||||
float separate_errors[4 * PARTITION_COUNT];
|
||||
|
||||
|
||||
float *separate_red_errors = separate_errors;
|
||||
float *separate_green_errors = separate_errors + PARTITION_COUNT;
|
||||
float *separate_blue_errors = separate_errors + 2 * PARTITION_COUNT;
|
||||
float *separate_alpha_errors = separate_errors + 3 * PARTITION_COUNT;
|
||||
|
||||
int defacto_search_limit = PARTITION_COUNT - 1;
|
||||
|
||||
if (uses_alpha)
|
||||
{
|
||||
|
||||
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
||||
if (print_diagnostics)
|
||||
printf("Partition testing with alpha, %d partitions\n\n", partition_count);
|
||||
#endif
|
||||
|
||||
for (i = 0; i < PARTITION_COUNT; i++)
|
||||
{
|
||||
int partition = partition_sequence[i];
|
||||
int bk_partition_count = ptab[partition].partition_count;
|
||||
|
||||
if (bk_partition_count < partition_count)
|
||||
{
|
||||
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
||||
if (print_diagnostics)
|
||||
printf("Partitioning %d-%d: invalid\n", partition_count, partition);
|
||||
#endif
|
||||
|
||||
uncorr_errors[i] = 1e35f;
|
||||
samechroma_errors[i] = 1e35f;
|
||||
separate_red_errors[i] = 1e35f;
|
||||
separate_green_errors[i] = 1e35f;
|
||||
separate_blue_errors[i] = 1e35f;
|
||||
separate_alpha_errors[i] = 1e35f;
|
||||
continue;
|
||||
}
|
||||
// the sentinel value for partitions above the search limit must be smaller
|
||||
// than the sentinel value for invalid partitions
|
||||
if (i >= partition_search_limit)
|
||||
{
|
||||
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
||||
if (print_diagnostics)
|
||||
printf("Partitioning %d-%d: excluded from testing\n", partition_count, partition);
|
||||
#endif
|
||||
|
||||
defacto_search_limit = i;
|
||||
|
||||
uncorr_errors[i] = 1e34f;
|
||||
samechroma_errors[i] = 1e34f;
|
||||
separate_red_errors[i] = 1e34f;
|
||||
separate_green_errors[i] = 1e34f;
|
||||
separate_blue_errors[i] = 1e34f;
|
||||
separate_alpha_errors[i] = 1e34f;
|
||||
break;
|
||||
}
|
||||
|
||||
// compute the weighting to give to each color channel
|
||||
// in each partition.
|
||||
float4 error_weightings[4];
|
||||
float4 color_scalefactors[4];
|
||||
float4 inverse_color_scalefactors[4];
|
||||
compute_partition_error_color_weightings(xdim, ydim, zdim, ewb, ptab + partition, error_weightings, color_scalefactors);
|
||||
|
||||
for (j = 0; j < partition_count; j++)
|
||||
{
|
||||
inverse_color_scalefactors[j].x = 1.0f / MAX(color_scalefactors[j].x, 1e-7f);
|
||||
inverse_color_scalefactors[j].y = 1.0f / MAX(color_scalefactors[j].y, 1e-7f);
|
||||
inverse_color_scalefactors[j].z = 1.0f / MAX(color_scalefactors[j].z, 1e-7f);
|
||||
inverse_color_scalefactors[j].w = 1.0f / MAX(color_scalefactors[j].w, 1e-7f);
|
||||
}
|
||||
|
||||
float4 averages[4];
|
||||
float4 directions_rgba[4];
|
||||
float3 directions_gba[4];
|
||||
float3 directions_rba[4];
|
||||
float3 directions_rga[4];
|
||||
float3 directions_rgb[4];
|
||||
|
||||
compute_averages_and_directions_rgba(ptab + partition, pb, ewb, color_scalefactors, averages, directions_rgba, directions_gba, directions_rba, directions_rga, directions_rgb);
|
||||
|
||||
line4 uncorr_lines[4];
|
||||
line4 samechroma_lines[4];
|
||||
line3 separate_red_lines[4];
|
||||
line3 separate_green_lines[4];
|
||||
line3 separate_blue_lines[4];
|
||||
line3 separate_alpha_lines[4];
|
||||
|
||||
processed_line4 proc_uncorr_lines[4];
|
||||
processed_line4 proc_samechroma_lines[4];
|
||||
processed_line3 proc_separate_red_lines[4];
|
||||
processed_line3 proc_separate_green_lines[4];
|
||||
processed_line3 proc_separate_blue_lines[4];
|
||||
processed_line3 proc_separate_alpha_lines[4];
|
||||
|
||||
float uncorr_linelengths[4];
|
||||
float samechroma_linelengths[4];
|
||||
float separate_red_linelengths[4];
|
||||
float separate_green_linelengths[4];
|
||||
float separate_blue_linelengths[4];
|
||||
float separate_alpha_linelengths[4];
|
||||
|
||||
|
||||
|
||||
for (j = 0; j < partition_count; j++)
|
||||
{
|
||||
uncorr_lines[j].a = averages[j];
|
||||
if (dot(directions_rgba[j], directions_rgba[j]) == 0.0f)
|
||||
uncorr_lines[j].b = normalize(float4(1, 1, 1, 1));
|
||||
else
|
||||
uncorr_lines[j].b = normalize(directions_rgba[j]);
|
||||
|
||||
proc_uncorr_lines[j].amod = (uncorr_lines[j].a - uncorr_lines[j].b * dot(uncorr_lines[j].a, uncorr_lines[j].b)) * inverse_color_scalefactors[j];
|
||||
proc_uncorr_lines[j].bs = (uncorr_lines[j].b * color_scalefactors[j]);
|
||||
proc_uncorr_lines[j].bis = (uncorr_lines[j].b * inverse_color_scalefactors[j]);
|
||||
|
||||
|
||||
samechroma_lines[j].a = float4(0, 0, 0, 0);
|
||||
if (dot(averages[j], averages[j]) == 0)
|
||||
samechroma_lines[j].b = normalize(float4(1, 1, 1, 1));
|
||||
else
|
||||
samechroma_lines[j].b = normalize(averages[j]);
|
||||
|
||||
proc_samechroma_lines[j].amod = (samechroma_lines[j].a - samechroma_lines[j].b * dot(samechroma_lines[j].a, samechroma_lines[j].b)) * inverse_color_scalefactors[j];
|
||||
proc_samechroma_lines[j].bs = (samechroma_lines[j].b * color_scalefactors[j]);
|
||||
proc_samechroma_lines[j].bis = (samechroma_lines[j].b * inverse_color_scalefactors[j]);
|
||||
|
||||
separate_red_lines[j].a = averages[j].yzw;
|
||||
if (dot(directions_gba[j], directions_gba[j]) == 0.0f)
|
||||
separate_red_lines[j].b = normalize(float3(1, 1, 1));
|
||||
else
|
||||
separate_red_lines[j].b = normalize(directions_gba[j]);
|
||||
|
||||
separate_green_lines[j].a = averages[j].xzw;
|
||||
if (dot(directions_rba[j], directions_rba[j]) == 0.0f)
|
||||
separate_green_lines[j].b = normalize(float3(1, 1, 1));
|
||||
else
|
||||
separate_green_lines[j].b = normalize(directions_rba[j]);
|
||||
|
||||
separate_blue_lines[j].a = averages[j].xyw;
|
||||
if (dot(directions_rga[j], directions_rga[j]) == 0.0f)
|
||||
separate_blue_lines[j].b = normalize(float3(1, 1, 1));
|
||||
else
|
||||
separate_blue_lines[j].b = normalize(directions_rga[j]);
|
||||
|
||||
separate_alpha_lines[j].a = averages[j].xyz;
|
||||
if (dot(directions_rgb[j], directions_rgb[j]) == 0.0f)
|
||||
separate_alpha_lines[j].b = normalize(float3(1, 1, 1));
|
||||
else
|
||||
separate_alpha_lines[j].b = normalize(directions_rgb[j]);
|
||||
|
||||
proc_separate_red_lines[j].amod = (separate_red_lines[j].a - separate_red_lines[j].b * dot(separate_red_lines[j].a, separate_red_lines[j].b)) * inverse_color_scalefactors[j].yzw;
|
||||
proc_separate_red_lines[j].bs = (separate_red_lines[j].b * color_scalefactors[j].yzw);
|
||||
proc_separate_red_lines[j].bis = (separate_red_lines[j].b * inverse_color_scalefactors[j].yzw);
|
||||
|
||||
proc_separate_green_lines[j].amod =
|
||||
(separate_green_lines[j].a - separate_green_lines[j].b * dot(separate_green_lines[j].a, separate_green_lines[j].b)) * inverse_color_scalefactors[j].xzw;
|
||||
proc_separate_green_lines[j].bs = (separate_green_lines[j].b * color_scalefactors[j].xzw);
|
||||
proc_separate_green_lines[j].bis = (separate_green_lines[j].b * inverse_color_scalefactors[j].xzw);
|
||||
|
||||
proc_separate_blue_lines[j].amod = (separate_blue_lines[j].a - separate_blue_lines[j].b * dot(separate_blue_lines[j].a, separate_blue_lines[j].b)) * inverse_color_scalefactors[j].xyw;
|
||||
proc_separate_blue_lines[j].bs = (separate_blue_lines[j].b * color_scalefactors[j].xyw);
|
||||
proc_separate_blue_lines[j].bis = (separate_blue_lines[j].b * inverse_color_scalefactors[j].xyw);
|
||||
|
||||
proc_separate_alpha_lines[j].amod =
|
||||
(separate_alpha_lines[j].a - separate_alpha_lines[j].b * dot(separate_alpha_lines[j].a, separate_alpha_lines[j].b)) * inverse_color_scalefactors[j].xyz;
|
||||
proc_separate_alpha_lines[j].bs = (separate_alpha_lines[j].b * color_scalefactors[j].xyz);
|
||||
proc_separate_alpha_lines[j].bis = (separate_alpha_lines[j].b * inverse_color_scalefactors[j].xyz);
|
||||
|
||||
}
|
||||
|
||||
float uncorr_error = compute_error_squared_rgba(ptab + partition,
|
||||
pb,
|
||||
ewb,
|
||||
proc_uncorr_lines,
|
||||
uncorr_linelengths);
|
||||
float samechroma_error = compute_error_squared_rgba(ptab + partition,
|
||||
pb,
|
||||
ewb,
|
||||
proc_samechroma_lines,
|
||||
samechroma_linelengths);
|
||||
|
||||
|
||||
float separate_red_error = compute_error_squared_gba(ptab + partition,
|
||||
pb,
|
||||
ewb,
|
||||
proc_separate_red_lines,
|
||||
separate_red_linelengths);
|
||||
|
||||
float separate_green_error = compute_error_squared_rba(ptab + partition,
|
||||
pb,
|
||||
ewb,
|
||||
proc_separate_green_lines,
|
||||
separate_green_linelengths);
|
||||
|
||||
float separate_blue_error = compute_error_squared_rga(ptab + partition,
|
||||
pb,
|
||||
ewb,
|
||||
proc_separate_blue_lines,
|
||||
separate_blue_linelengths);
|
||||
|
||||
float separate_alpha_error = compute_error_squared_rgb(ptab + partition,
|
||||
pb,
|
||||
ewb,
|
||||
proc_separate_alpha_lines,
|
||||
separate_alpha_linelengths);
|
||||
|
||||
// compute minimum & maximum alpha values in each partition
|
||||
float red_min[4], red_max[4];
|
||||
float green_min[4], green_max[4];
|
||||
float blue_min[4], blue_max[4];
|
||||
float alpha_min[4], alpha_max[4];
|
||||
compute_alpha_minmax(xdim, ydim, zdim, ptab + partition, pb, ewb, alpha_min, alpha_max);
|
||||
|
||||
compute_rgb_minmax(xdim, ydim, zdim, ptab + partition, pb, ewb, red_min, red_max, green_min, green_max, blue_min, blue_max);
|
||||
|
||||
/*
|
||||
Compute an estimate of error introduced by weight quantization imprecision.
|
||||
This error is computed as follows, for each partition
|
||||
1: compute the principal-axis vector (full length) in error-space
|
||||
2: convert the principal-axis vector to regular RGB-space
|
||||
3: scale the vector by a constant that estimates average quantization error
|
||||
4: for each texel, square the vector, then do a dot-product with the texel's error weight;
|
||||
sum up the results across all texels.
|
||||
4(optimized): square the vector once, then do a dot-product with the average texel error,
|
||||
then multiply by the number of texels.
|
||||
*/
|
||||
|
||||
for (j = 0; j < partition_count; j++)
|
||||
{
|
||||
float tpp = (float)(ptab[partition].texels_per_partition[j]);
|
||||
|
||||
float4 ics = inverse_color_scalefactors[j];
|
||||
float4 error_weights = error_weightings[j] * (tpp * weight_imprecision_estim_squared);
|
||||
|
||||
float4 uncorr_vector = (uncorr_lines[j].b * uncorr_linelengths[j]) * ics;
|
||||
float4 samechroma_vector = (samechroma_lines[j].b * samechroma_linelengths[j]) * ics;
|
||||
float3 separate_red_vector = (separate_red_lines[j].b * separate_red_linelengths[j]) * ics.yzw;
|
||||
float3 separate_green_vector = (separate_green_lines[j].b * separate_green_linelengths[j]) * ics.xzw;
|
||||
float3 separate_blue_vector = (separate_blue_lines[j].b * separate_blue_linelengths[j]) * ics.xyw;
|
||||
float3 separate_alpha_vector = (separate_alpha_lines[j].b * separate_alpha_linelengths[j]) * ics.xyz;
|
||||
|
||||
uncorr_vector = uncorr_vector * uncorr_vector;
|
||||
samechroma_vector = samechroma_vector * samechroma_vector;
|
||||
separate_red_vector = separate_red_vector * separate_red_vector;
|
||||
separate_green_vector = separate_green_vector * separate_green_vector;
|
||||
separate_blue_vector = separate_blue_vector * separate_blue_vector;
|
||||
separate_alpha_vector = separate_alpha_vector * separate_alpha_vector;
|
||||
|
||||
uncorr_error += dot(uncorr_vector, error_weights);
|
||||
samechroma_error += dot(samechroma_vector, error_weights);
|
||||
separate_red_error += dot(separate_red_vector, error_weights.yzw);
|
||||
separate_green_error += dot(separate_green_vector, error_weights.xzw);
|
||||
separate_blue_error += dot(separate_blue_vector, error_weights.xyw);
|
||||
separate_alpha_error += dot(separate_alpha_vector, error_weights.xyz);
|
||||
|
||||
float red_scalar = (red_max[j] - red_min[j]);
|
||||
float green_scalar = (green_max[j] - green_min[j]);
|
||||
float blue_scalar = (blue_max[j] - blue_min[j]);
|
||||
float alpha_scalar = (alpha_max[j] - alpha_min[j]);
|
||||
red_scalar *= red_scalar;
|
||||
green_scalar *= green_scalar;
|
||||
blue_scalar *= blue_scalar;
|
||||
alpha_scalar *= alpha_scalar;
|
||||
separate_red_error += red_scalar * error_weights.x;
|
||||
separate_green_error += green_scalar * error_weights.y;
|
||||
separate_blue_error += blue_scalar * error_weights.z;
|
||||
separate_alpha_error += alpha_scalar * error_weights.w;
|
||||
}
|
||||
|
||||
uncorr_errors[i] = uncorr_error;
|
||||
samechroma_errors[i] = samechroma_error;
|
||||
separate_red_errors[i] = separate_red_error;
|
||||
separate_green_errors[i] = separate_green_error;
|
||||
separate_blue_errors[i] = separate_blue_error;
|
||||
separate_alpha_errors[i] = separate_alpha_error;
|
||||
|
||||
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
||||
if (print_diagnostics)
|
||||
printf("Partitioning %d-%d errors: uncorr=%g, samechroma=%g, sep-alpha=%g\n", partition_count, i, uncorr_error, samechroma_error, separate_alpha_error);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
||||
if (print_diagnostics)
|
||||
printf("Partition testing without alpha, %d partitions\n", partition_count);
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
for (i = 0; i < PARTITION_COUNT; i++)
|
||||
{
|
||||
|
||||
int partition = partition_sequence[i];
|
||||
|
||||
int bk_partition_count = ptab[partition].partition_count;
|
||||
if (bk_partition_count < partition_count)
|
||||
{
|
||||
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
||||
if (print_diagnostics)
|
||||
printf("Partitioning %d-%d: invalid\n", partition_count, i);
|
||||
#endif
|
||||
|
||||
uncorr_errors[i] = 1e35f;
|
||||
samechroma_errors[i] = 1e35f;
|
||||
separate_red_errors[i] = 1e35f;
|
||||
separate_green_errors[i] = 1e35f;
|
||||
separate_blue_errors[i] = 1e35f;
|
||||
continue;
|
||||
}
|
||||
// the sentinel value for valid partitions above the search limit must be smaller
|
||||
// than the sentinel value for invalid partitions
|
||||
if (i >= partition_search_limit)
|
||||
{
|
||||
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
||||
if (print_diagnostics)
|
||||
printf(" Partitioning %d-%d: excluded from testing\n", partition_count, partition);
|
||||
#endif
|
||||
|
||||
defacto_search_limit = i;
|
||||
uncorr_errors[i] = 1e34f;
|
||||
samechroma_errors[i] = 1e34f;
|
||||
separate_red_errors[i] = 1e34f;
|
||||
separate_green_errors[i] = 1e34f;
|
||||
separate_blue_errors[i] = 1e34f;
|
||||
break;
|
||||
|
||||
}
|
||||
|
||||
// compute the weighting to give to each color channel
|
||||
// in each partition.
|
||||
float4 error_weightings[4];
|
||||
float4 color_scalefactors[4];
|
||||
float4 inverse_color_scalefactors[4];
|
||||
|
||||
compute_partition_error_color_weightings(xdim, ydim, zdim, ewb, ptab + partition, error_weightings, color_scalefactors);
|
||||
|
||||
for (j = 0; j < partition_count; j++)
|
||||
{
|
||||
inverse_color_scalefactors[j].x = 1.0f / MAX(color_scalefactors[j].x, 1e-7f);
|
||||
inverse_color_scalefactors[j].y = 1.0f / MAX(color_scalefactors[j].y, 1e-7f);
|
||||
inverse_color_scalefactors[j].z = 1.0f / MAX(color_scalefactors[j].z, 1e-7f);
|
||||
inverse_color_scalefactors[j].w = 1.0f / MAX(color_scalefactors[j].w, 1e-7f);
|
||||
}
|
||||
|
||||
float3 averages[4];
|
||||
float3 directions_rgb[4];
|
||||
float2 directions_rg[4];
|
||||
float2 directions_rb[4];
|
||||
float2 directions_gb[4];
|
||||
|
||||
compute_averages_and_directions_rgb(ptab + partition, pb, ewb, color_scalefactors, averages, directions_rgb, directions_rg, directions_rb, directions_gb);
|
||||
|
||||
line3 uncorr_lines[4];
|
||||
line3 samechroma_lines[4];
|
||||
line2 separate_red_lines[4];
|
||||
line2 separate_green_lines[4];
|
||||
line2 separate_blue_lines[4];
|
||||
|
||||
processed_line3 proc_uncorr_lines[4];
|
||||
processed_line3 proc_samechroma_lines[4];
|
||||
|
||||
processed_line2 proc_separate_red_lines[4];
|
||||
processed_line2 proc_separate_green_lines[4];
|
||||
processed_line2 proc_separate_blue_lines[4];
|
||||
|
||||
float uncorr_linelengths[4];
|
||||
float samechroma_linelengths[4];
|
||||
float separate_red_linelengths[4];
|
||||
float separate_green_linelengths[4];
|
||||
float separate_blue_linelengths[4];
|
||||
|
||||
for (j = 0; j < partition_count; j++)
|
||||
{
|
||||
uncorr_lines[j].a = averages[j];
|
||||
if (dot(directions_rgb[j], directions_rgb[j]) == 0.0f)
|
||||
uncorr_lines[j].b = normalize(float3(1, 1, 1));
|
||||
else
|
||||
uncorr_lines[j].b = normalize(directions_rgb[j]);
|
||||
|
||||
|
||||
samechroma_lines[j].a = float3(0, 0, 0);
|
||||
|
||||
if (dot(averages[j], averages[j]) == 0.0f)
|
||||
samechroma_lines[j].b = normalize(float3(1, 1, 1));
|
||||
else
|
||||
samechroma_lines[j].b = normalize(averages[j]);
|
||||
|
||||
proc_uncorr_lines[j].amod = (uncorr_lines[j].a - uncorr_lines[j].b * dot(uncorr_lines[j].a, uncorr_lines[j].b)) * inverse_color_scalefactors[j].xyz;
|
||||
proc_uncorr_lines[j].bs = (uncorr_lines[j].b * color_scalefactors[j].xyz);
|
||||
proc_uncorr_lines[j].bis = (uncorr_lines[j].b * inverse_color_scalefactors[j].xyz);
|
||||
|
||||
proc_samechroma_lines[j].amod = (samechroma_lines[j].a - samechroma_lines[j].b * dot(samechroma_lines[j].a, samechroma_lines[j].b)) * inverse_color_scalefactors[j].xyz;
|
||||
proc_samechroma_lines[j].bs = (samechroma_lines[j].b * color_scalefactors[j].xyz);
|
||||
proc_samechroma_lines[j].bis = (samechroma_lines[j].b * inverse_color_scalefactors[j].xyz);
|
||||
|
||||
separate_red_lines[j].a = averages[j].yz;
|
||||
if (dot(directions_gb[j], directions_gb[j]) == 0.0f)
|
||||
separate_red_lines[j].b = normalize(float2(1, 1));
|
||||
else
|
||||
separate_red_lines[j].b = normalize(directions_gb[j]);
|
||||
|
||||
separate_green_lines[j].a = averages[j].xz;
|
||||
if (dot(directions_rb[j], directions_rb[j]) == 0.0f)
|
||||
separate_green_lines[j].b = normalize(float2(1, 1));
|
||||
else
|
||||
separate_green_lines[j].b = normalize(directions_rb[j]);
|
||||
|
||||
separate_blue_lines[j].a = averages[j].xy;
|
||||
if (dot(directions_rg[j], directions_rg[j]) == 0.0f)
|
||||
separate_blue_lines[j].b = normalize(float2(1, 1));
|
||||
else
|
||||
separate_blue_lines[j].b = normalize(directions_rg[j]);
|
||||
|
||||
proc_separate_red_lines[j].amod = (separate_red_lines[j].a - separate_red_lines[j].b * dot(separate_red_lines[j].a, separate_red_lines[j].b)) * inverse_color_scalefactors[j].yz;
|
||||
proc_separate_red_lines[j].bs = (separate_red_lines[j].b * color_scalefactors[j].yz);
|
||||
proc_separate_red_lines[j].bis = (separate_red_lines[j].b * inverse_color_scalefactors[j].yz);
|
||||
|
||||
proc_separate_green_lines[j].amod =
|
||||
(separate_green_lines[j].a - separate_green_lines[j].b * dot(separate_green_lines[j].a, separate_green_lines[j].b)) * inverse_color_scalefactors[j].xz;
|
||||
proc_separate_green_lines[j].bs = (separate_green_lines[j].b * color_scalefactors[j].xz);
|
||||
proc_separate_green_lines[j].bis = (separate_green_lines[j].b * inverse_color_scalefactors[j].xz);
|
||||
|
||||
proc_separate_blue_lines[j].amod = (separate_blue_lines[j].a - separate_blue_lines[j].b * dot(separate_blue_lines[j].a, separate_blue_lines[j].b)) * inverse_color_scalefactors[j].xy;
|
||||
proc_separate_blue_lines[j].bs = (separate_blue_lines[j].b * color_scalefactors[j].xy);
|
||||
proc_separate_blue_lines[j].bis = (separate_blue_lines[j].b * inverse_color_scalefactors[j].xy);
|
||||
|
||||
}
|
||||
|
||||
float uncorr_error = compute_error_squared_rgb(ptab + partition,
|
||||
pb,
|
||||
ewb,
|
||||
proc_uncorr_lines,
|
||||
uncorr_linelengths);
|
||||
float samechroma_error = compute_error_squared_rgb(ptab + partition,
|
||||
pb,
|
||||
ewb,
|
||||
proc_samechroma_lines,
|
||||
samechroma_linelengths);
|
||||
|
||||
float separate_red_error = compute_error_squared_gb(ptab + partition,
|
||||
pb,
|
||||
ewb,
|
||||
proc_separate_red_lines,
|
||||
separate_red_linelengths);
|
||||
|
||||
float separate_green_error = compute_error_squared_rb(ptab + partition,
|
||||
pb,
|
||||
ewb,
|
||||
proc_separate_green_lines,
|
||||
separate_green_linelengths);
|
||||
|
||||
float separate_blue_error = compute_error_squared_rg(ptab + partition,
|
||||
pb,
|
||||
ewb,
|
||||
proc_separate_blue_lines,
|
||||
separate_blue_linelengths);
|
||||
|
||||
float red_min[4], red_max[4];
|
||||
float green_min[4], green_max[4];
|
||||
float blue_min[4], blue_max[4];
|
||||
|
||||
|
||||
compute_rgb_minmax(xdim, ydim, zdim, ptab + partition, pb, ewb, red_min, red_max, green_min, green_max, blue_min, blue_max);
|
||||
|
||||
|
||||
|
||||
/*
|
||||
compute an estimate of error introduced by weight imprecision.
|
||||
This error is computed as follows, for each partition
|
||||
1: compute the principal-axis vector (full length) in error-space
|
||||
2: convert the principal-axis vector to regular RGB-space
|
||||
3: scale the vector by a constant that estimates average quantization error.
|
||||
4: for each texel, square the vector, then do a dot-product with the texel's error weight;
|
||||
sum up the results across all texels.
|
||||
4(optimized): square the vector once, then do a dot-product with the average texel error,
|
||||
then multiply by the number of texels.
|
||||
*/
|
||||
|
||||
|
||||
for (j = 0; j < partition_count; j++)
|
||||
{
|
||||
float tpp = (float)(ptab[partition].texels_per_partition[j]);
|
||||
|
||||
float3 ics = inverse_color_scalefactors[j].xyz;
|
||||
float3 error_weights = error_weightings[j].xyz * (tpp * weight_imprecision_estim_squared);
|
||||
|
||||
float3 uncorr_vector = (uncorr_lines[j].b * uncorr_linelengths[j]) * ics;
|
||||
float3 samechroma_vector = (samechroma_lines[j].b * samechroma_linelengths[j]) * ics;
|
||||
|
||||
float2 separate_red_vector = (separate_red_lines[j].b * separate_red_linelengths[j]) * ics.yz;
|
||||
float2 separate_green_vector = (separate_green_lines[j].b * separate_green_linelengths[j]) * ics.xz;
|
||||
float2 separate_blue_vector = (separate_blue_lines[j].b * separate_blue_linelengths[j]) * ics.xy;
|
||||
|
||||
uncorr_vector = uncorr_vector * uncorr_vector;
|
||||
samechroma_vector = samechroma_vector * samechroma_vector;
|
||||
separate_red_vector = separate_red_vector * separate_red_vector;
|
||||
separate_green_vector = separate_green_vector * separate_green_vector;
|
||||
separate_blue_vector = separate_blue_vector * separate_blue_vector;
|
||||
|
||||
uncorr_error += dot(uncorr_vector, error_weights);
|
||||
samechroma_error += dot(samechroma_vector, error_weights);
|
||||
separate_red_error += dot(separate_red_vector, error_weights.yz);
|
||||
separate_green_error += dot(separate_green_vector, error_weights.xz);
|
||||
separate_blue_error += dot(separate_blue_vector, error_weights.xy);
|
||||
|
||||
float red_scalar = (red_max[j] - red_min[j]);
|
||||
float green_scalar = (green_max[j] - green_min[j]);
|
||||
float blue_scalar = (blue_max[j] - blue_min[j]);
|
||||
|
||||
red_scalar *= red_scalar;
|
||||
green_scalar *= green_scalar;
|
||||
blue_scalar *= blue_scalar;
|
||||
|
||||
separate_red_error += red_scalar * error_weights.x;
|
||||
separate_green_error += green_scalar * error_weights.y;
|
||||
separate_blue_error += blue_scalar * error_weights.z;
|
||||
}
|
||||
|
||||
|
||||
uncorr_errors[i] = uncorr_error;
|
||||
samechroma_errors[i] = samechroma_error;
|
||||
|
||||
separate_red_errors[i] = separate_red_error;
|
||||
separate_green_errors[i] = separate_green_error;
|
||||
separate_blue_errors[i] = separate_blue_error;
|
||||
|
||||
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
||||
if (print_diagnostics)
|
||||
printf("Partitioning %d-%d errors: uncorr=%f, samechroma=%f, sep-red=%f, sep-green=%f, sep-blue=%f\n",
|
||||
partition_count, partition, uncorr_error, samechroma_error, separate_red_error, separate_green_error, separate_blue_error);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for (i = 0; i < candidates_to_return; i++)
|
||||
{
|
||||
int best_uncorr_partition = 0;
|
||||
int best_samechroma_partition = 0;
|
||||
float best_uncorr_error = 1e30f;
|
||||
float best_samechroma_error = 1e30f;
|
||||
for (j = 0; j <= defacto_search_limit; j++)
|
||||
{
|
||||
if (uncorr_errors[j] < best_uncorr_error)
|
||||
{
|
||||
best_uncorr_partition = j;
|
||||
best_uncorr_error = uncorr_errors[j];
|
||||
}
|
||||
}
|
||||
best_partitions_uncorrellated[i] = partition_sequence[best_uncorr_partition];
|
||||
uncorr_errors[best_uncorr_partition] = 1e30f;
|
||||
samechroma_errors[best_uncorr_partition] = 1e30f;
|
||||
|
||||
for (j = 0; j <= defacto_search_limit; j++)
|
||||
{
|
||||
if (samechroma_errors[j] < best_samechroma_error)
|
||||
{
|
||||
best_samechroma_partition = j;
|
||||
best_samechroma_error = samechroma_errors[j];
|
||||
}
|
||||
}
|
||||
best_partitions_samechroma[i] = partition_sequence[best_samechroma_partition];
|
||||
samechroma_errors[best_samechroma_partition] = 1e30f;
|
||||
uncorr_errors[best_samechroma_partition] = 1e30f;
|
||||
}
|
||||
|
||||
for (i = 0; i < 2 * candidates_to_return; i++)
|
||||
{
|
||||
int best_partition = 0;
|
||||
float best_partition_error = 1e30f;
|
||||
|
||||
for (j = 0; j <= defacto_search_limit; j++)
|
||||
{
|
||||
if (1 || !uses_alpha)
|
||||
{
|
||||
if (separate_errors[j] < best_partition_error)
|
||||
{
|
||||
best_partition = j;
|
||||
best_partition_error = separate_errors[j];
|
||||
}
|
||||
if (separate_errors[j + PARTITION_COUNT] < best_partition_error)
|
||||
{
|
||||
best_partition = j + PARTITION_COUNT;
|
||||
best_partition_error = separate_errors[j + PARTITION_COUNT];
|
||||
}
|
||||
if (separate_errors[j + 2 * PARTITION_COUNT] < best_partition_error)
|
||||
{
|
||||
best_partition = j + 2 * PARTITION_COUNT;
|
||||
best_partition_error = separate_errors[j + 2 * PARTITION_COUNT];
|
||||
}
|
||||
}
|
||||
if (uses_alpha)
|
||||
{
|
||||
if (separate_errors[j + 3 * PARTITION_COUNT] < best_partition_error)
|
||||
{
|
||||
best_partition = j + 3 * PARTITION_COUNT;
|
||||
best_partition_error = separate_errors[j + 3 * PARTITION_COUNT];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
separate_errors[best_partition] = 1e30f;
|
||||
best_partition = ((best_partition >> PARTITION_BITS) << PARTITION_BITS) | partition_sequence[best_partition & (PARTITION_COUNT - 1)];
|
||||
best_partitions_dual_weight_planes[i] = best_partition;
|
||||
}
|
||||
|
||||
}
|
||||
2163
3rdparty/astc/astc_ideal_endpoints_and_weights.cpp
vendored
Normal file
2163
3rdparty/astc/astc_ideal_endpoints_and_weights.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
324
3rdparty/astc/astc_imageblock.cpp
vendored
Normal file
324
3rdparty/astc/astc_imageblock.cpp
vendored
Normal file
@@ -0,0 +1,324 @@
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This confidential and proprietary software may be used only as
|
||||
* authorised by a licensing agreement from ARM Limited
|
||||
* (C) COPYRIGHT 2011-2012 ARM Limited
|
||||
* ALL RIGHTS RESERVED
|
||||
*
|
||||
* The entire notice above must be reproduced on all authorised
|
||||
* copies and copies may only be made to the extent permitted
|
||||
* by a licensing agreement from ARM Limited.
|
||||
*
|
||||
* @brief Functions for managing ASTC codec images.
|
||||
*/
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "astc_codec_internals.h"
|
||||
|
||||
#include "softfloat.h"
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
// conversion functions between the LNS representation and the FP16 representation.
|
||||
|
||||
float float_to_lns(float p)
|
||||
{
|
||||
|
||||
if (astc_isnan(p) || p <= 1.0f / 67108864.0f)
|
||||
{
|
||||
// underflow or NaN value, return 0.
|
||||
// We count underflow if the input value is smaller than 2^-26.
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (fabs(p) >= 65536.0f)
|
||||
{
|
||||
// overflow, return a +INF value
|
||||
return 65535;
|
||||
}
|
||||
|
||||
int expo;
|
||||
float normfrac = frexp(p, &expo);
|
||||
float p1;
|
||||
if (expo < -13)
|
||||
{
|
||||
// input number is smaller than 2^-14. In this case, multiply by 2^25.
|
||||
p1 = p * 33554432.0f;
|
||||
expo = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
expo += 14;
|
||||
p1 = (normfrac - 0.5f) * 4096.0f;
|
||||
}
|
||||
|
||||
if (p1 < 384.0f)
|
||||
p1 *= 4.0f / 3.0f;
|
||||
else if (p1 <= 1408.0f)
|
||||
p1 += 128.0f;
|
||||
else
|
||||
p1 = (p1 + 512.0f) * (4.0f / 5.0f);
|
||||
|
||||
p1 += expo * 2048.0f;
|
||||
return p1 + 1.0f;
|
||||
}
|
||||
|
||||
|
||||
|
||||
uint16_t lns_to_sf16(uint16_t p)
|
||||
{
|
||||
|
||||
uint16_t mc = p & 0x7FF;
|
||||
uint16_t ec = p >> 11;
|
||||
uint16_t mt;
|
||||
if (mc < 512)
|
||||
mt = 3 * mc;
|
||||
else if (mc < 1536)
|
||||
mt = 4 * mc - 512;
|
||||
else
|
||||
mt = 5 * mc - 2048;
|
||||
|
||||
uint16_t res = (ec << 10) | (mt >> 3);
|
||||
if (res >= 0x7BFF)
|
||||
res = 0x7BFF;
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
// conversion function from 16-bit LDR value to FP16.
|
||||
// note: for LDR interpolation, it is impossible to get a denormal result;
|
||||
// this simplifies the conversion.
|
||||
// FALSE; we can receive a very small UNORM16 through the constant-block.
|
||||
uint16_t unorm16_to_sf16(uint16_t p)
|
||||
{
|
||||
if (p == 0xFFFF)
|
||||
return 0x3C00; // value of 1.0 .
|
||||
if (p < 4)
|
||||
return p << 8;
|
||||
|
||||
int lz = clz32(p) - 16;
|
||||
p <<= (lz + 1);
|
||||
p >>= 6;
|
||||
p |= (14 - lz) << 10;
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void imageblock_initialize_deriv_from_work_and_orig(imageblock * pb, int pixelcount)
|
||||
{
|
||||
int i;
|
||||
|
||||
const float *fptr = pb->orig_data;
|
||||
const float *wptr = pb->work_data;
|
||||
float *dptr = pb->deriv_data;
|
||||
|
||||
for (i = 0; i < pixelcount; i++)
|
||||
{
|
||||
|
||||
// compute derivatives for RGB first
|
||||
if (pb->rgb_lns[i])
|
||||
{
|
||||
float r = MAX(fptr[0], 6e-5f);
|
||||
float g = MAX(fptr[1], 6e-5f);
|
||||
float b = MAX(fptr[2], 6e-5f);
|
||||
|
||||
float rderiv = (float_to_lns(r * 1.05f) - float_to_lns(r)) / (r * 0.05f);
|
||||
float gderiv = (float_to_lns(g * 1.05f) - float_to_lns(g)) / (g * 0.05f);
|
||||
float bderiv = (float_to_lns(b * 1.05f) - float_to_lns(b)) / (b * 0.05f);
|
||||
|
||||
// the derivative may not actually take values smaller than 1/32 or larger than 2^25;
|
||||
// if it does, we clamp it.
|
||||
if (rderiv < (1.0f / 32.0f))
|
||||
rderiv = (1.0f / 32.0f);
|
||||
else if (rderiv > 33554432.0f)
|
||||
rderiv = 33554432.0f;
|
||||
|
||||
if (gderiv < (1.0f / 32.0f))
|
||||
gderiv = (1.0f / 32.0f);
|
||||
else if (gderiv > 33554432.0f)
|
||||
gderiv = 33554432.0f;
|
||||
|
||||
if (bderiv < (1.0f / 32.0f))
|
||||
bderiv = (1.0f / 32.0f);
|
||||
else if (bderiv > 33554432.0f)
|
||||
bderiv = 33554432.0f;
|
||||
|
||||
dptr[0] = rderiv;
|
||||
dptr[1] = gderiv;
|
||||
dptr[2] = bderiv;
|
||||
}
|
||||
else
|
||||
{
|
||||
dptr[0] = 65535.0f;
|
||||
dptr[1] = 65535.0f;
|
||||
dptr[2] = 65535.0f;
|
||||
}
|
||||
|
||||
|
||||
// then compute derivatives for Alpha
|
||||
if (pb->alpha_lns[i])
|
||||
{
|
||||
float a = MAX(fptr[3], 6e-5f);
|
||||
float aderiv = (float_to_lns(a * 1.05f) - float_to_lns(a)) / (a * 0.05f);
|
||||
// the derivative may not actually take values smaller than 1/32 or larger than 2^25;
|
||||
// if it does, we clamp it.
|
||||
if (aderiv < (1.0f / 32.0f))
|
||||
aderiv = (1.0f / 32.0f);
|
||||
else if (aderiv > 33554432.0f)
|
||||
aderiv = 33554432.0f;
|
||||
|
||||
dptr[3] = aderiv;
|
||||
}
|
||||
else
|
||||
{
|
||||
dptr[3] = 65535.0f;
|
||||
}
|
||||
|
||||
fptr += 4;
|
||||
wptr += 4;
|
||||
dptr += 4;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// helper function to initialize the work-data from the orig-data
|
||||
void imageblock_initialize_work_from_orig(imageblock * pb, int pixelcount)
|
||||
{
|
||||
int i;
|
||||
float *fptr = pb->orig_data;
|
||||
float *wptr = pb->work_data;
|
||||
|
||||
for (i = 0; i < pixelcount; i++)
|
||||
{
|
||||
if (pb->rgb_lns[i])
|
||||
{
|
||||
wptr[0] = float_to_lns(fptr[0]);
|
||||
wptr[1] = float_to_lns(fptr[1]);
|
||||
wptr[2] = float_to_lns(fptr[2]);
|
||||
}
|
||||
else
|
||||
{
|
||||
wptr[0] = fptr[0] * 65535.0f;
|
||||
wptr[1] = fptr[1] * 65535.0f;
|
||||
wptr[2] = fptr[2] * 65535.0f;
|
||||
}
|
||||
|
||||
if (pb->alpha_lns[i])
|
||||
{
|
||||
wptr[3] = float_to_lns(fptr[3]);
|
||||
}
|
||||
else
|
||||
{
|
||||
wptr[3] = fptr[3] * 65535.0f;
|
||||
}
|
||||
fptr += 4;
|
||||
wptr += 4;
|
||||
}
|
||||
|
||||
imageblock_initialize_deriv_from_work_and_orig(pb, pixelcount);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// helper function to initialize the orig-data from the work-data
|
||||
void imageblock_initialize_orig_from_work(imageblock * pb, int pixelcount)
|
||||
{
|
||||
int i;
|
||||
float *fptr = pb->orig_data;
|
||||
float *wptr = pb->work_data;
|
||||
|
||||
for (i = 0; i < pixelcount; i++)
|
||||
{
|
||||
if (pb->rgb_lns[i])
|
||||
{
|
||||
fptr[0] = sf16_to_float(lns_to_sf16((uint16_t) wptr[0]));
|
||||
fptr[1] = sf16_to_float(lns_to_sf16((uint16_t) wptr[1]));
|
||||
fptr[2] = sf16_to_float(lns_to_sf16((uint16_t) wptr[2]));
|
||||
}
|
||||
else
|
||||
{
|
||||
fptr[0] = sf16_to_float(unorm16_to_sf16((uint16_t) wptr[0]));
|
||||
fptr[1] = sf16_to_float(unorm16_to_sf16((uint16_t) wptr[1]));
|
||||
fptr[2] = sf16_to_float(unorm16_to_sf16((uint16_t) wptr[2]));
|
||||
}
|
||||
|
||||
if (pb->alpha_lns[i])
|
||||
{
|
||||
fptr[3] = sf16_to_float(lns_to_sf16((uint16_t) wptr[3]));
|
||||
}
|
||||
else
|
||||
{
|
||||
fptr[3] = sf16_to_float(unorm16_to_sf16((uint16_t) wptr[3]));
|
||||
}
|
||||
|
||||
fptr += 4;
|
||||
wptr += 4;
|
||||
}
|
||||
|
||||
imageblock_initialize_deriv_from_work_and_orig(pb, pixelcount);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
For an imageblock, update its flags.
|
||||
|
||||
The updating is done based on work_data, not orig_data.
|
||||
*/
|
||||
void update_imageblock_flags(imageblock * pb, int xdim, int ydim, int zdim)
|
||||
{
|
||||
int i;
|
||||
float red_min = 1e38f, red_max = -1e38f;
|
||||
float green_min = 1e38f, green_max = -1e38f;
|
||||
float blue_min = 1e38f, blue_max = -1e38f;
|
||||
float alpha_min = 1e38f, alpha_max = -1e38f;
|
||||
|
||||
int texels_per_block = xdim * ydim * zdim;
|
||||
|
||||
int grayscale = 1;
|
||||
|
||||
for (i = 0; i < texels_per_block; i++)
|
||||
{
|
||||
float red = pb->work_data[4 * i];
|
||||
float green = pb->work_data[4 * i + 1];
|
||||
float blue = pb->work_data[4 * i + 2];
|
||||
float alpha = pb->work_data[4 * i + 3];
|
||||
if (red < red_min)
|
||||
red_min = red;
|
||||
if (red > red_max)
|
||||
red_max = red;
|
||||
if (green < green_min)
|
||||
green_min = green;
|
||||
if (green > green_max)
|
||||
green_max = green;
|
||||
if (blue < blue_min)
|
||||
blue_min = blue;
|
||||
if (blue > blue_max)
|
||||
blue_max = blue;
|
||||
if (alpha < alpha_min)
|
||||
alpha_min = alpha;
|
||||
if (alpha > alpha_max)
|
||||
alpha_max = alpha;
|
||||
|
||||
if (grayscale == 1 && (red != green || red != blue))
|
||||
grayscale = 0;
|
||||
}
|
||||
|
||||
pb->red_min = red_min;
|
||||
pb->red_max = red_max;
|
||||
pb->green_min = green_min;
|
||||
pb->green_max = green_max;
|
||||
pb->blue_min = blue_min;
|
||||
pb->blue_max = blue_max;
|
||||
pb->alpha_min = alpha_min;
|
||||
pb->alpha_max = alpha_max;
|
||||
pb->grayscale = grayscale;
|
||||
}
|
||||
|
||||
649
3rdparty/astc/astc_integer_sequence.cpp
vendored
Normal file
649
3rdparty/astc/astc_integer_sequence.cpp
vendored
Normal file
@@ -0,0 +1,649 @@
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This confidential and proprietary software may be used only as
|
||||
* authorised by a licensing agreement from ARM Limited
|
||||
* (C) COPYRIGHT 2011-2012 ARM Limited
|
||||
* ALL RIGHTS RESERVED
|
||||
*
|
||||
* The entire notice above must be reproduced on all authorised
|
||||
* copies and copies may only be made to the extent permitted
|
||||
* by a licensing agreement from ARM Limited.
|
||||
*
|
||||
* @brief Functions to encode/decode data using Bounded Integer Sequence
|
||||
* Encoding.
|
||||
*/
|
||||
/*----------------------------------------------------------------------------*/
|
||||
#include "astc_codec_internals.h"
|
||||
// unpacked quint triplets <low,middle,high> for each packed-quint value
|
||||
static const uint8_t quints_of_integer[128][3] = {
|
||||
{0, 0, 0}, {1, 0, 0}, {2, 0, 0}, {3, 0, 0},
|
||||
{4, 0, 0}, {0, 4, 0}, {4, 4, 0}, {4, 4, 4},
|
||||
{0, 1, 0}, {1, 1, 0}, {2, 1, 0}, {3, 1, 0},
|
||||
{4, 1, 0}, {1, 4, 0}, {4, 4, 1}, {4, 4, 4},
|
||||
{0, 2, 0}, {1, 2, 0}, {2, 2, 0}, {3, 2, 0},
|
||||
{4, 2, 0}, {2, 4, 0}, {4, 4, 2}, {4, 4, 4},
|
||||
{0, 3, 0}, {1, 3, 0}, {2, 3, 0}, {3, 3, 0},
|
||||
{4, 3, 0}, {3, 4, 0}, {4, 4, 3}, {4, 4, 4},
|
||||
{0, 0, 1}, {1, 0, 1}, {2, 0, 1}, {3, 0, 1},
|
||||
{4, 0, 1}, {0, 4, 1}, {4, 0, 4}, {0, 4, 4},
|
||||
{0, 1, 1}, {1, 1, 1}, {2, 1, 1}, {3, 1, 1},
|
||||
{4, 1, 1}, {1, 4, 1}, {4, 1, 4}, {1, 4, 4},
|
||||
{0, 2, 1}, {1, 2, 1}, {2, 2, 1}, {3, 2, 1},
|
||||
{4, 2, 1}, {2, 4, 1}, {4, 2, 4}, {2, 4, 4},
|
||||
{0, 3, 1}, {1, 3, 1}, {2, 3, 1}, {3, 3, 1},
|
||||
{4, 3, 1}, {3, 4, 1}, {4, 3, 4}, {3, 4, 4},
|
||||
{0, 0, 2}, {1, 0, 2}, {2, 0, 2}, {3, 0, 2},
|
||||
{4, 0, 2}, {0, 4, 2}, {2, 0, 4}, {3, 0, 4},
|
||||
{0, 1, 2}, {1, 1, 2}, {2, 1, 2}, {3, 1, 2},
|
||||
{4, 1, 2}, {1, 4, 2}, {2, 1, 4}, {3, 1, 4},
|
||||
{0, 2, 2}, {1, 2, 2}, {2, 2, 2}, {3, 2, 2},
|
||||
{4, 2, 2}, {2, 4, 2}, {2, 2, 4}, {3, 2, 4},
|
||||
{0, 3, 2}, {1, 3, 2}, {2, 3, 2}, {3, 3, 2},
|
||||
{4, 3, 2}, {3, 4, 2}, {2, 3, 4}, {3, 3, 4},
|
||||
{0, 0, 3}, {1, 0, 3}, {2, 0, 3}, {3, 0, 3},
|
||||
{4, 0, 3}, {0, 4, 3}, {0, 0, 4}, {1, 0, 4},
|
||||
{0, 1, 3}, {1, 1, 3}, {2, 1, 3}, {3, 1, 3},
|
||||
{4, 1, 3}, {1, 4, 3}, {0, 1, 4}, {1, 1, 4},
|
||||
{0, 2, 3}, {1, 2, 3}, {2, 2, 3}, {3, 2, 3},
|
||||
{4, 2, 3}, {2, 4, 3}, {0, 2, 4}, {1, 2, 4},
|
||||
{0, 3, 3}, {1, 3, 3}, {2, 3, 3}, {3, 3, 3},
|
||||
{4, 3, 3}, {3, 4, 3}, {0, 3, 4}, {1, 3, 4},
|
||||
};
|
||||
|
||||
// packed quint-value for every unpacked quint-triplet
|
||||
// indexed by [high][middle][low]
|
||||
static const uint8_t integer_of_quints[5][5][5] = {
|
||||
{
|
||||
{0, 1, 2, 3, 4,},
|
||||
{8, 9, 10, 11, 12,},
|
||||
{16, 17, 18, 19, 20,},
|
||||
{24, 25, 26, 27, 28,},
|
||||
{5, 13, 21, 29, 6,},
|
||||
},
|
||||
{
|
||||
{32, 33, 34, 35, 36,},
|
||||
{40, 41, 42, 43, 44,},
|
||||
{48, 49, 50, 51, 52,},
|
||||
{56, 57, 58, 59, 60,},
|
||||
{37, 45, 53, 61, 14,},
|
||||
},
|
||||
{
|
||||
{64, 65, 66, 67, 68,},
|
||||
{72, 73, 74, 75, 76,},
|
||||
{80, 81, 82, 83, 84,},
|
||||
{88, 89, 90, 91, 92,},
|
||||
{69, 77, 85, 93, 22,},
|
||||
},
|
||||
{
|
||||
{96, 97, 98, 99, 100,},
|
||||
{104, 105, 106, 107, 108,},
|
||||
{112, 113, 114, 115, 116,},
|
||||
{120, 121, 122, 123, 124,},
|
||||
{101, 109, 117, 125, 30,},
|
||||
},
|
||||
{
|
||||
{102, 103, 70, 71, 38,},
|
||||
{110, 111, 78, 79, 46,},
|
||||
{118, 119, 86, 87, 54,},
|
||||
{126, 127, 94, 95, 62,},
|
||||
{39, 47, 55, 63, 31,},
|
||||
},
|
||||
};
|
||||
|
||||
// unpacked trit quintuplets <low,_,_,_,high> for each packed-quint value
|
||||
static const uint8_t trits_of_integer[256][5] = {
|
||||
{0, 0, 0, 0, 0}, {1, 0, 0, 0, 0}, {2, 0, 0, 0, 0}, {0, 0, 2, 0, 0},
|
||||
{0, 1, 0, 0, 0}, {1, 1, 0, 0, 0}, {2, 1, 0, 0, 0}, {1, 0, 2, 0, 0},
|
||||
{0, 2, 0, 0, 0}, {1, 2, 0, 0, 0}, {2, 2, 0, 0, 0}, {2, 0, 2, 0, 0},
|
||||
{0, 2, 2, 0, 0}, {1, 2, 2, 0, 0}, {2, 2, 2, 0, 0}, {2, 0, 2, 0, 0},
|
||||
{0, 0, 1, 0, 0}, {1, 0, 1, 0, 0}, {2, 0, 1, 0, 0}, {0, 1, 2, 0, 0},
|
||||
{0, 1, 1, 0, 0}, {1, 1, 1, 0, 0}, {2, 1, 1, 0, 0}, {1, 1, 2, 0, 0},
|
||||
{0, 2, 1, 0, 0}, {1, 2, 1, 0, 0}, {2, 2, 1, 0, 0}, {2, 1, 2, 0, 0},
|
||||
{0, 0, 0, 2, 2}, {1, 0, 0, 2, 2}, {2, 0, 0, 2, 2}, {0, 0, 2, 2, 2},
|
||||
{0, 0, 0, 1, 0}, {1, 0, 0, 1, 0}, {2, 0, 0, 1, 0}, {0, 0, 2, 1, 0},
|
||||
{0, 1, 0, 1, 0}, {1, 1, 0, 1, 0}, {2, 1, 0, 1, 0}, {1, 0, 2, 1, 0},
|
||||
{0, 2, 0, 1, 0}, {1, 2, 0, 1, 0}, {2, 2, 0, 1, 0}, {2, 0, 2, 1, 0},
|
||||
{0, 2, 2, 1, 0}, {1, 2, 2, 1, 0}, {2, 2, 2, 1, 0}, {2, 0, 2, 1, 0},
|
||||
{0, 0, 1, 1, 0}, {1, 0, 1, 1, 0}, {2, 0, 1, 1, 0}, {0, 1, 2, 1, 0},
|
||||
{0, 1, 1, 1, 0}, {1, 1, 1, 1, 0}, {2, 1, 1, 1, 0}, {1, 1, 2, 1, 0},
|
||||
{0, 2, 1, 1, 0}, {1, 2, 1, 1, 0}, {2, 2, 1, 1, 0}, {2, 1, 2, 1, 0},
|
||||
{0, 1, 0, 2, 2}, {1, 1, 0, 2, 2}, {2, 1, 0, 2, 2}, {1, 0, 2, 2, 2},
|
||||
{0, 0, 0, 2, 0}, {1, 0, 0, 2, 0}, {2, 0, 0, 2, 0}, {0, 0, 2, 2, 0},
|
||||
{0, 1, 0, 2, 0}, {1, 1, 0, 2, 0}, {2, 1, 0, 2, 0}, {1, 0, 2, 2, 0},
|
||||
{0, 2, 0, 2, 0}, {1, 2, 0, 2, 0}, {2, 2, 0, 2, 0}, {2, 0, 2, 2, 0},
|
||||
{0, 2, 2, 2, 0}, {1, 2, 2, 2, 0}, {2, 2, 2, 2, 0}, {2, 0, 2, 2, 0},
|
||||
{0, 0, 1, 2, 0}, {1, 0, 1, 2, 0}, {2, 0, 1, 2, 0}, {0, 1, 2, 2, 0},
|
||||
{0, 1, 1, 2, 0}, {1, 1, 1, 2, 0}, {2, 1, 1, 2, 0}, {1, 1, 2, 2, 0},
|
||||
{0, 2, 1, 2, 0}, {1, 2, 1, 2, 0}, {2, 2, 1, 2, 0}, {2, 1, 2, 2, 0},
|
||||
{0, 2, 0, 2, 2}, {1, 2, 0, 2, 2}, {2, 2, 0, 2, 2}, {2, 0, 2, 2, 2},
|
||||
{0, 0, 0, 0, 2}, {1, 0, 0, 0, 2}, {2, 0, 0, 0, 2}, {0, 0, 2, 0, 2},
|
||||
{0, 1, 0, 0, 2}, {1, 1, 0, 0, 2}, {2, 1, 0, 0, 2}, {1, 0, 2, 0, 2},
|
||||
{0, 2, 0, 0, 2}, {1, 2, 0, 0, 2}, {2, 2, 0, 0, 2}, {2, 0, 2, 0, 2},
|
||||
{0, 2, 2, 0, 2}, {1, 2, 2, 0, 2}, {2, 2, 2, 0, 2}, {2, 0, 2, 0, 2},
|
||||
{0, 0, 1, 0, 2}, {1, 0, 1, 0, 2}, {2, 0, 1, 0, 2}, {0, 1, 2, 0, 2},
|
||||
{0, 1, 1, 0, 2}, {1, 1, 1, 0, 2}, {2, 1, 1, 0, 2}, {1, 1, 2, 0, 2},
|
||||
{0, 2, 1, 0, 2}, {1, 2, 1, 0, 2}, {2, 2, 1, 0, 2}, {2, 1, 2, 0, 2},
|
||||
{0, 2, 2, 2, 2}, {1, 2, 2, 2, 2}, {2, 2, 2, 2, 2}, {2, 0, 2, 2, 2},
|
||||
{0, 0, 0, 0, 1}, {1, 0, 0, 0, 1}, {2, 0, 0, 0, 1}, {0, 0, 2, 0, 1},
|
||||
{0, 1, 0, 0, 1}, {1, 1, 0, 0, 1}, {2, 1, 0, 0, 1}, {1, 0, 2, 0, 1},
|
||||
{0, 2, 0, 0, 1}, {1, 2, 0, 0, 1}, {2, 2, 0, 0, 1}, {2, 0, 2, 0, 1},
|
||||
{0, 2, 2, 0, 1}, {1, 2, 2, 0, 1}, {2, 2, 2, 0, 1}, {2, 0, 2, 0, 1},
|
||||
{0, 0, 1, 0, 1}, {1, 0, 1, 0, 1}, {2, 0, 1, 0, 1}, {0, 1, 2, 0, 1},
|
||||
{0, 1, 1, 0, 1}, {1, 1, 1, 0, 1}, {2, 1, 1, 0, 1}, {1, 1, 2, 0, 1},
|
||||
{0, 2, 1, 0, 1}, {1, 2, 1, 0, 1}, {2, 2, 1, 0, 1}, {2, 1, 2, 0, 1},
|
||||
{0, 0, 1, 2, 2}, {1, 0, 1, 2, 2}, {2, 0, 1, 2, 2}, {0, 1, 2, 2, 2},
|
||||
{0, 0, 0, 1, 1}, {1, 0, 0, 1, 1}, {2, 0, 0, 1, 1}, {0, 0, 2, 1, 1},
|
||||
{0, 1, 0, 1, 1}, {1, 1, 0, 1, 1}, {2, 1, 0, 1, 1}, {1, 0, 2, 1, 1},
|
||||
{0, 2, 0, 1, 1}, {1, 2, 0, 1, 1}, {2, 2, 0, 1, 1}, {2, 0, 2, 1, 1},
|
||||
{0, 2, 2, 1, 1}, {1, 2, 2, 1, 1}, {2, 2, 2, 1, 1}, {2, 0, 2, 1, 1},
|
||||
{0, 0, 1, 1, 1}, {1, 0, 1, 1, 1}, {2, 0, 1, 1, 1}, {0, 1, 2, 1, 1},
|
||||
{0, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {2, 1, 1, 1, 1}, {1, 1, 2, 1, 1},
|
||||
{0, 2, 1, 1, 1}, {1, 2, 1, 1, 1}, {2, 2, 1, 1, 1}, {2, 1, 2, 1, 1},
|
||||
{0, 1, 1, 2, 2}, {1, 1, 1, 2, 2}, {2, 1, 1, 2, 2}, {1, 1, 2, 2, 2},
|
||||
{0, 0, 0, 2, 1}, {1, 0, 0, 2, 1}, {2, 0, 0, 2, 1}, {0, 0, 2, 2, 1},
|
||||
{0, 1, 0, 2, 1}, {1, 1, 0, 2, 1}, {2, 1, 0, 2, 1}, {1, 0, 2, 2, 1},
|
||||
{0, 2, 0, 2, 1}, {1, 2, 0, 2, 1}, {2, 2, 0, 2, 1}, {2, 0, 2, 2, 1},
|
||||
{0, 2, 2, 2, 1}, {1, 2, 2, 2, 1}, {2, 2, 2, 2, 1}, {2, 0, 2, 2, 1},
|
||||
{0, 0, 1, 2, 1}, {1, 0, 1, 2, 1}, {2, 0, 1, 2, 1}, {0, 1, 2, 2, 1},
|
||||
{0, 1, 1, 2, 1}, {1, 1, 1, 2, 1}, {2, 1, 1, 2, 1}, {1, 1, 2, 2, 1},
|
||||
{0, 2, 1, 2, 1}, {1, 2, 1, 2, 1}, {2, 2, 1, 2, 1}, {2, 1, 2, 2, 1},
|
||||
{0, 2, 1, 2, 2}, {1, 2, 1, 2, 2}, {2, 2, 1, 2, 2}, {2, 1, 2, 2, 2},
|
||||
{0, 0, 0, 1, 2}, {1, 0, 0, 1, 2}, {2, 0, 0, 1, 2}, {0, 0, 2, 1, 2},
|
||||
{0, 1, 0, 1, 2}, {1, 1, 0, 1, 2}, {2, 1, 0, 1, 2}, {1, 0, 2, 1, 2},
|
||||
{0, 2, 0, 1, 2}, {1, 2, 0, 1, 2}, {2, 2, 0, 1, 2}, {2, 0, 2, 1, 2},
|
||||
{0, 2, 2, 1, 2}, {1, 2, 2, 1, 2}, {2, 2, 2, 1, 2}, {2, 0, 2, 1, 2},
|
||||
{0, 0, 1, 1, 2}, {1, 0, 1, 1, 2}, {2, 0, 1, 1, 2}, {0, 1, 2, 1, 2},
|
||||
{0, 1, 1, 1, 2}, {1, 1, 1, 1, 2}, {2, 1, 1, 1, 2}, {1, 1, 2, 1, 2},
|
||||
{0, 2, 1, 1, 2}, {1, 2, 1, 1, 2}, {2, 2, 1, 1, 2}, {2, 1, 2, 1, 2},
|
||||
{0, 2, 2, 2, 2}, {1, 2, 2, 2, 2}, {2, 2, 2, 2, 2}, {2, 1, 2, 2, 2},
|
||||
};
|
||||
|
||||
// packed trit-value for every unpacked trit-quintuplet
|
||||
// indexed by [high][][][][low]
|
||||
static const uint8_t integer_of_trits[3][3][3][3][3] = {
|
||||
{
|
||||
{
|
||||
{
|
||||
{0, 1, 2,},
|
||||
{4, 5, 6,},
|
||||
{8, 9, 10,},
|
||||
},
|
||||
{
|
||||
{16, 17, 18,},
|
||||
{20, 21, 22,},
|
||||
{24, 25, 26,},
|
||||
},
|
||||
{
|
||||
{3, 7, 15,},
|
||||
{19, 23, 27,},
|
||||
{12, 13, 14,},
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
{32, 33, 34,},
|
||||
{36, 37, 38,},
|
||||
{40, 41, 42,},
|
||||
},
|
||||
{
|
||||
{48, 49, 50,},
|
||||
{52, 53, 54,},
|
||||
{56, 57, 58,},
|
||||
},
|
||||
{
|
||||
{35, 39, 47,},
|
||||
{51, 55, 59,},
|
||||
{44, 45, 46,},
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
{64, 65, 66,},
|
||||
{68, 69, 70,},
|
||||
{72, 73, 74,},
|
||||
},
|
||||
{
|
||||
{80, 81, 82,},
|
||||
{84, 85, 86,},
|
||||
{88, 89, 90,},
|
||||
},
|
||||
{
|
||||
{67, 71, 79,},
|
||||
{83, 87, 91,},
|
||||
{76, 77, 78,},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
{
|
||||
{128, 129, 130,},
|
||||
{132, 133, 134,},
|
||||
{136, 137, 138,},
|
||||
},
|
||||
{
|
||||
{144, 145, 146,},
|
||||
{148, 149, 150,},
|
||||
{152, 153, 154,},
|
||||
},
|
||||
{
|
||||
{131, 135, 143,},
|
||||
{147, 151, 155,},
|
||||
{140, 141, 142,},
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
{160, 161, 162,},
|
||||
{164, 165, 166,},
|
||||
{168, 169, 170,},
|
||||
},
|
||||
{
|
||||
{176, 177, 178,},
|
||||
{180, 181, 182,},
|
||||
{184, 185, 186,},
|
||||
},
|
||||
{
|
||||
{163, 167, 175,},
|
||||
{179, 183, 187,},
|
||||
{172, 173, 174,},
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
{192, 193, 194,},
|
||||
{196, 197, 198,},
|
||||
{200, 201, 202,},
|
||||
},
|
||||
{
|
||||
{208, 209, 210,},
|
||||
{212, 213, 214,},
|
||||
{216, 217, 218,},
|
||||
},
|
||||
{
|
||||
{195, 199, 207,},
|
||||
{211, 215, 219,},
|
||||
{204, 205, 206,},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
{
|
||||
{96, 97, 98,},
|
||||
{100, 101, 102,},
|
||||
{104, 105, 106,},
|
||||
},
|
||||
{
|
||||
{112, 113, 114,},
|
||||
{116, 117, 118,},
|
||||
{120, 121, 122,},
|
||||
},
|
||||
{
|
||||
{99, 103, 111,},
|
||||
{115, 119, 123,},
|
||||
{108, 109, 110,},
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
{224, 225, 226,},
|
||||
{228, 229, 230,},
|
||||
{232, 233, 234,},
|
||||
},
|
||||
{
|
||||
{240, 241, 242,},
|
||||
{244, 245, 246,},
|
||||
{248, 249, 250,},
|
||||
},
|
||||
{
|
||||
{227, 231, 239,},
|
||||
{243, 247, 251,},
|
||||
{236, 237, 238,},
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
{28, 29, 30,},
|
||||
{60, 61, 62,},
|
||||
{92, 93, 94,},
|
||||
},
|
||||
{
|
||||
{156, 157, 158,},
|
||||
{188, 189, 190,},
|
||||
{220, 221, 222,},
|
||||
},
|
||||
{
|
||||
{31, 63, 127,},
|
||||
{159, 191, 255,},
|
||||
{252, 253, 254,},
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
|
||||
void find_number_of_bits_trits_quints(int quantization_level, int *bits, int *trits, int *quints)
|
||||
{
|
||||
*bits = 0;
|
||||
*trits = 0;
|
||||
*quints = 0;
|
||||
switch (quantization_level)
|
||||
{
|
||||
case QUANT_2:
|
||||
*bits = 1;
|
||||
break;
|
||||
case QUANT_3:
|
||||
*bits = 0;
|
||||
*trits = 1;
|
||||
break;
|
||||
case QUANT_4:
|
||||
*bits = 2;
|
||||
break;
|
||||
case QUANT_5:
|
||||
*bits = 0;
|
||||
*quints = 1;
|
||||
break;
|
||||
case QUANT_6:
|
||||
*bits = 1;
|
||||
*trits = 1;
|
||||
break;
|
||||
case QUANT_8:
|
||||
*bits = 3;
|
||||
break;
|
||||
case QUANT_10:
|
||||
*bits = 1;
|
||||
*quints = 1;
|
||||
break;
|
||||
case QUANT_12:
|
||||
*bits = 2;
|
||||
*trits = 1;
|
||||
break;
|
||||
case QUANT_16:
|
||||
*bits = 4;
|
||||
break;
|
||||
case QUANT_20:
|
||||
*bits = 2;
|
||||
*quints = 1;
|
||||
break;
|
||||
case QUANT_24:
|
||||
*bits = 3;
|
||||
*trits = 1;
|
||||
break;
|
||||
case QUANT_32:
|
||||
*bits = 5;
|
||||
break;
|
||||
case QUANT_40:
|
||||
*bits = 3;
|
||||
*quints = 1;
|
||||
break;
|
||||
case QUANT_48:
|
||||
*bits = 4;
|
||||
*trits = 1;
|
||||
break;
|
||||
case QUANT_64:
|
||||
*bits = 6;
|
||||
break;
|
||||
case QUANT_80:
|
||||
*bits = 4;
|
||||
*quints = 1;
|
||||
break;
|
||||
case QUANT_96:
|
||||
*bits = 5;
|
||||
*trits = 1;
|
||||
break;
|
||||
case QUANT_128:
|
||||
*bits = 7;
|
||||
break;
|
||||
case QUANT_160:
|
||||
*bits = 5;
|
||||
*quints = 1;
|
||||
break;
|
||||
case QUANT_192:
|
||||
*bits = 6;
|
||||
*trits = 1;
|
||||
break;
|
||||
case QUANT_256:
|
||||
*bits = 8;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// routine to write up to 8 bits
|
||||
static inline void write_bits(int value, int bitcount, int bitoffset, uint8_t * ptr)
|
||||
{
|
||||
int mask = (1 << bitcount) - 1;
|
||||
value &= mask;
|
||||
ptr += bitoffset >> 3;
|
||||
bitoffset &= 7;
|
||||
value <<= bitoffset;
|
||||
mask <<= bitoffset;
|
||||
mask = ~mask;
|
||||
|
||||
ptr[0] &= mask;
|
||||
ptr[0] |= value;
|
||||
ptr[1] &= mask >> 8;
|
||||
ptr[1] |= value >> 8;
|
||||
}
|
||||
|
||||
|
||||
// routine to read up to 8 bits
|
||||
static inline int read_bits(int bitcount, int bitoffset, const uint8_t * ptr)
|
||||
{
|
||||
int mask = (1 << bitcount) - 1;
|
||||
ptr += bitoffset >> 3;
|
||||
bitoffset &= 7;
|
||||
int value = ptr[0] | (ptr[1] << 8);
|
||||
value >>= bitoffset;
|
||||
value &= mask;
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void encode_ise(int quantization_level, int elements, const uint8_t * input_data, uint8_t * output_data, int bit_offset)
|
||||
{
|
||||
int i;
|
||||
uint8_t lowparts[64];
|
||||
uint8_t highparts[69]; // 64 elements + 5 elements for padding
|
||||
uint8_t tq_blocks[22]; // trit-blocks or quint-blocks
|
||||
|
||||
int bits, trits, quints;
|
||||
find_number_of_bits_trits_quints(quantization_level, &bits, &trits, &quints);
|
||||
|
||||
for (i = 0; i < elements; i++)
|
||||
{
|
||||
lowparts[i] = input_data[i] & ((1 << bits) - 1);
|
||||
highparts[i] = input_data[i] >> bits;
|
||||
}
|
||||
for (i = elements; i < elements + 5; i++)
|
||||
highparts[i] = 0; // padding before we start constructing trit-blocks or quint-blocks
|
||||
|
||||
// construct trit-blocks or quint-blocks as necessary
|
||||
if (trits)
|
||||
{
|
||||
int trit_blocks = (elements + 4) / 5;
|
||||
for (i = 0; i < trit_blocks; i++)
|
||||
tq_blocks[i] = integer_of_trits[highparts[5 * i + 4]][highparts[5 * i + 3]][highparts[5 * i + 2]][highparts[5 * i + 1]][highparts[5 * i]];
|
||||
}
|
||||
if (quints)
|
||||
{
|
||||
int quint_blocks = (elements + 2) / 3;
|
||||
for (i = 0; i < quint_blocks; i++)
|
||||
tq_blocks[i] = integer_of_quints[highparts[3 * i + 2]][highparts[3 * i + 1]][highparts[3 * i]];
|
||||
}
|
||||
|
||||
// then, write out the actual bits.
|
||||
int lcounter = 0;
|
||||
int hcounter = 0;
|
||||
for (i = 0; i < elements; i++)
|
||||
{
|
||||
write_bits(lowparts[i], bits, bit_offset, output_data);
|
||||
bit_offset += bits;
|
||||
if (trits)
|
||||
{
|
||||
static const int bits_to_write[5] = { 2, 2, 1, 2, 1 };
|
||||
static const int block_shift[5] = { 0, 2, 4, 5, 7 };
|
||||
static const int next_lcounter[5] = { 1, 2, 3, 4, 0 };
|
||||
static const int hcounter_incr[5] = { 0, 0, 0, 0, 1 };
|
||||
write_bits(tq_blocks[hcounter] >> block_shift[lcounter], bits_to_write[lcounter], bit_offset, output_data);
|
||||
bit_offset += bits_to_write[lcounter];
|
||||
hcounter += hcounter_incr[lcounter];
|
||||
lcounter = next_lcounter[lcounter];
|
||||
}
|
||||
if (quints)
|
||||
{
|
||||
static const int bits_to_write[3] = { 3, 2, 2 };
|
||||
static const int block_shift[3] = { 0, 3, 5 };
|
||||
static const int next_lcounter[3] = { 1, 2, 0 };
|
||||
static const int hcounter_incr[3] = { 0, 0, 1 };
|
||||
write_bits(tq_blocks[hcounter] >> block_shift[lcounter], bits_to_write[lcounter], bit_offset, output_data);
|
||||
bit_offset += bits_to_write[lcounter];
|
||||
hcounter += hcounter_incr[lcounter];
|
||||
lcounter = next_lcounter[lcounter];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void decode_ise(int quantization_level, int elements, const uint8_t * input_data, uint8_t * output_data, int bit_offset)
|
||||
{
|
||||
int i;
|
||||
// note: due to how the trit/quint-block unpacking is done in this function,
|
||||
// we may write more temporary results than the number of outputs
|
||||
// The maximum actual number of results is 64 bit, but we keep 4 additional elements
|
||||
// of padding.
|
||||
uint8_t results[68];
|
||||
uint8_t tq_blocks[22]; // trit-blocks or quint-blocks
|
||||
|
||||
int bits, trits, quints;
|
||||
find_number_of_bits_trits_quints(quantization_level, &bits, &trits, &quints);
|
||||
|
||||
int lcounter = 0;
|
||||
int hcounter = 0;
|
||||
|
||||
// trit-blocks or quint-blocks must be zeroed out before we collect them in the loop below.
|
||||
for (i = 0; i < 22; i++)
|
||||
tq_blocks[i] = 0;
|
||||
|
||||
// collect bits for each element, as well as bits for any trit-blocks and quint-blocks.
|
||||
for (i = 0; i < elements; i++)
|
||||
{
|
||||
results[i] = read_bits(bits, bit_offset, input_data);
|
||||
bit_offset += bits;
|
||||
if (trits)
|
||||
{
|
||||
static const int bits_to_read[5] = { 2, 2, 1, 2, 1 };
|
||||
static const int block_shift[5] = { 0, 2, 4, 5, 7 };
|
||||
static const int next_lcounter[5] = { 1, 2, 3, 4, 0 };
|
||||
static const int hcounter_incr[5] = { 0, 0, 0, 0, 1 };
|
||||
int tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data);
|
||||
bit_offset += bits_to_read[lcounter];
|
||||
tq_blocks[hcounter] |= tdata << block_shift[lcounter];
|
||||
hcounter += hcounter_incr[lcounter];
|
||||
lcounter = next_lcounter[lcounter];
|
||||
}
|
||||
if (quints)
|
||||
{
|
||||
static const int bits_to_read[3] = { 3, 2, 2 };
|
||||
static const int block_shift[3] = { 0, 3, 5 };
|
||||
static const int next_lcounter[3] = { 1, 2, 0 };
|
||||
static const int hcounter_incr[3] = { 0, 0, 1 };
|
||||
int tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data);
|
||||
bit_offset += bits_to_read[lcounter];
|
||||
tq_blocks[hcounter] |= tdata << block_shift[lcounter];
|
||||
hcounter += hcounter_incr[lcounter];
|
||||
lcounter = next_lcounter[lcounter];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// unpack trit-blocks or quint-blocks as needed
|
||||
if (trits)
|
||||
{
|
||||
int trit_blocks = (elements + 4) / 5;
|
||||
for (i = 0; i < trit_blocks; i++)
|
||||
{
|
||||
const uint8_t *tritptr = trits_of_integer[tq_blocks[i]];
|
||||
results[5 * i] |= tritptr[0] << bits;
|
||||
results[5 * i + 1] |= tritptr[1] << bits;
|
||||
results[5 * i + 2] |= tritptr[2] << bits;
|
||||
results[5 * i + 3] |= tritptr[3] << bits;
|
||||
results[5 * i + 4] |= tritptr[4] << bits;
|
||||
}
|
||||
}
|
||||
|
||||
if (quints)
|
||||
{
|
||||
int quint_blocks = (elements + 2) / 3;
|
||||
for (i = 0; i < quint_blocks; i++)
|
||||
{
|
||||
const uint8_t *quintptr = quints_of_integer[tq_blocks[i]];
|
||||
results[3 * i] |= quintptr[0] << bits;
|
||||
results[3 * i + 1] |= quintptr[1] << bits;
|
||||
results[3 * i + 2] |= quintptr[2] << bits;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < elements; i++)
|
||||
output_data[i] = results[i];
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
int compute_ise_bitcount(int items, quantization_method quant)
|
||||
{
|
||||
switch (quant)
|
||||
{
|
||||
case QUANT_2:
|
||||
return items;
|
||||
case QUANT_3:
|
||||
return (8 * items + 4) / 5;
|
||||
case QUANT_4:
|
||||
return 2 * items;
|
||||
case QUANT_5:
|
||||
return (7 * items + 2) / 3;
|
||||
case QUANT_6:
|
||||
return (13 * items + 4) / 5;
|
||||
case QUANT_8:
|
||||
return 3 * items;
|
||||
case QUANT_10:
|
||||
return (10 * items + 2) / 3;
|
||||
case QUANT_12:
|
||||
return (18 * items + 4) / 5;
|
||||
case QUANT_16:
|
||||
return items * 4;
|
||||
case QUANT_20:
|
||||
return (13 * items + 2) / 3;
|
||||
case QUANT_24:
|
||||
return (23 * items + 4) / 5;
|
||||
case QUANT_32:
|
||||
return 5 * items;
|
||||
case QUANT_40:
|
||||
return (16 * items + 2) / 3;
|
||||
case QUANT_48:
|
||||
return (28 * items + 4) / 5;
|
||||
case QUANT_64:
|
||||
return 6 * items;
|
||||
case QUANT_80:
|
||||
return (19 * items + 2) / 3;
|
||||
case QUANT_96:
|
||||
return (33 * items + 4) / 5;
|
||||
case QUANT_128:
|
||||
return 7 * items;
|
||||
case QUANT_160:
|
||||
return (22 * items + 2) / 3;
|
||||
case QUANT_192:
|
||||
return (38 * items + 4) / 5;
|
||||
case QUANT_256:
|
||||
return 8 * items;
|
||||
default:
|
||||
return 100000;
|
||||
}
|
||||
}
|
||||
520
3rdparty/astc/astc_kmeans_partitioning.cpp
vendored
Normal file
520
3rdparty/astc/astc_kmeans_partitioning.cpp
vendored
Normal file
@@ -0,0 +1,520 @@
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This confidential and proprietary software may be used only as
|
||||
* authorised by a licensing agreement from ARM Limited
|
||||
* (C) COPYRIGHT 2011-2012 ARM Limited
|
||||
* ALL RIGHTS RESERVED
|
||||
*
|
||||
* The entire notice above must be reproduced on all authorised
|
||||
* copies and copies may only be made to the extent permitted
|
||||
* by a licensing agreement from ARM Limited.
|
||||
*
|
||||
* @brief approximate k-means cluster partitioning. Do this in 2 stages
|
||||
*
|
||||
* 1: basic clustering, a couple of passes just to get a few clusters
|
||||
* 2: clustering based on line, a few passes until it seems to
|
||||
* stabilize.
|
||||
*
|
||||
* After clustering is done, we use the clustering result to construct
|
||||
* one bitmap for each partition. We then scan though the partition table,
|
||||
* counting how well the bitmaps matched.
|
||||
*/
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
#include "astc_codec_internals.h"
|
||||
|
||||
// for k++ means, we need pseudo-random numbers, however using random numbers directly
|
||||
// results in irreproducible encoding results. As such, we will instead
|
||||
// just supply a handful of numbers from random.org, and apply an algorithm similar
|
||||
// to XKCD #221. (http://xkcd.com/221/)
|
||||
// cluster the texels using the k++ means clustering initialization algorithm.
|
||||
|
||||
void kpp_initialize(int xdim, int ydim, int zdim, int partition_count, const imageblock * blk, float4 * cluster_centers)
|
||||
{
|
||||
int i;
|
||||
|
||||
int texels_per_block = xdim * ydim * zdim;
|
||||
|
||||
int cluster_center_samples[4];
|
||||
// pick a random sample as first center-point.
|
||||
cluster_center_samples[0] = 145897 /* number from random.org */ % texels_per_block;
|
||||
int samples_selected = 1;
|
||||
|
||||
float distances[MAX_TEXELS_PER_BLOCK];
|
||||
|
||||
// compute the distance to the first point.
|
||||
int sample = cluster_center_samples[0];
|
||||
float4 center_color = float4(blk->work_data[4 * sample],
|
||||
blk->work_data[4 * sample + 1],
|
||||
blk->work_data[4 * sample + 2],
|
||||
blk->work_data[4 * sample + 3]);
|
||||
|
||||
float distance_sum = 0.0f;
|
||||
for (i = 0; i < texels_per_block; i++)
|
||||
{
|
||||
float4 color = float4(blk->work_data[4 * i],
|
||||
blk->work_data[4 * i + 1],
|
||||
blk->work_data[4 * i + 2],
|
||||
blk->work_data[4 * i + 3]);
|
||||
float4 diff = color - center_color;
|
||||
float distance = dot(diff, diff);
|
||||
distance_sum += distance;
|
||||
distances[i] = distance;
|
||||
}
|
||||
|
||||
// more numbers from random.org
|
||||
float cluster_cutoffs[25] = {
|
||||
0.952312f, 0.206893f, 0.835984f, 0.507813f, 0.466170f,
|
||||
0.872331f, 0.488028f, 0.866394f, 0.363093f, 0.467905f,
|
||||
0.812967f, 0.626220f, 0.932770f, 0.275454f, 0.832020f,
|
||||
0.362217f, 0.318558f, 0.240113f, 0.009190f, 0.983995f,
|
||||
0.566812f, 0.347661f, 0.731960f, 0.156391f, 0.297786f
|
||||
};
|
||||
|
||||
while (1)
|
||||
{
|
||||
// pick a point in a weighted-random fashion.
|
||||
float summa = 0.0f;
|
||||
float distance_cutoff = distance_sum * cluster_cutoffs[samples_selected + 5 * partition_count];
|
||||
for (i = 0; i < texels_per_block; i++)
|
||||
{
|
||||
summa += distances[i];
|
||||
if (summa >= distance_cutoff)
|
||||
break;
|
||||
}
|
||||
sample = i;
|
||||
if (sample >= texels_per_block)
|
||||
sample = texels_per_block - 1;
|
||||
|
||||
|
||||
cluster_center_samples[samples_selected] = sample;
|
||||
samples_selected++;
|
||||
if (samples_selected >= partition_count)
|
||||
break;
|
||||
|
||||
// update the distances with the new point.
|
||||
center_color = float4(blk->work_data[4 * sample], blk->work_data[4 * sample + 1], blk->work_data[4 * sample + 2], blk->work_data[4 * sample + 3]);
|
||||
|
||||
distance_sum = 0.0f;
|
||||
for (i = 0; i < texels_per_block; i++)
|
||||
{
|
||||
float4 color = float4(blk->work_data[4 * i],
|
||||
blk->work_data[4 * i + 1],
|
||||
blk->work_data[4 * i + 2],
|
||||
blk->work_data[4 * i + 3]);
|
||||
float4 diff = color - center_color;
|
||||
float distance = dot(diff, diff);
|
||||
distance = MIN(distance, distances[i]);
|
||||
distance_sum += distance;
|
||||
distances[i] = distance;
|
||||
}
|
||||
}
|
||||
|
||||
// finally, gather up the results.
|
||||
for (i = 0; i < partition_count; i++)
|
||||
{
|
||||
int sample = cluster_center_samples[i];
|
||||
float4 color = float4(blk->work_data[4 * sample],
|
||||
blk->work_data[4 * sample + 1],
|
||||
blk->work_data[4 * sample + 2],
|
||||
blk->work_data[4 * sample + 3]);
|
||||
cluster_centers[i] = color;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// basic K-means clustering: given a set of cluster centers,
|
||||
// assign each texel to a partition
|
||||
void basic_kmeans_assign_pass(int xdim, int ydim, int zdim, int partition_count, const imageblock * blk, const float4 * cluster_centers, int *partition_of_texel)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
int texels_per_block = xdim * ydim * zdim;
|
||||
|
||||
float distances[MAX_TEXELS_PER_BLOCK];
|
||||
float4 center_color = cluster_centers[0];
|
||||
|
||||
int texels_per_partition[4];
|
||||
|
||||
texels_per_partition[0] = texels_per_block;
|
||||
for (i = 1; i < partition_count; i++)
|
||||
texels_per_partition[i] = 0;
|
||||
|
||||
|
||||
for (i = 0; i < texels_per_block; i++)
|
||||
{
|
||||
float4 color = float4(blk->work_data[4 * i],
|
||||
blk->work_data[4 * i + 1],
|
||||
blk->work_data[4 * i + 2],
|
||||
blk->work_data[4 * i + 3]);
|
||||
float4 diff = color - center_color;
|
||||
float distance = dot(diff, diff);
|
||||
distances[i] = distance;
|
||||
partition_of_texel[i] = 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
for (j = 1; j < partition_count; j++)
|
||||
{
|
||||
float4 center_color = cluster_centers[j];
|
||||
|
||||
for (i = 0; i < texels_per_block; i++)
|
||||
{
|
||||
float4 color = float4(blk->work_data[4 * i],
|
||||
blk->work_data[4 * i + 1],
|
||||
blk->work_data[4 * i + 2],
|
||||
blk->work_data[4 * i + 3]);
|
||||
float4 diff = color - center_color;
|
||||
float distance = dot(diff, diff);
|
||||
if (distance < distances[i])
|
||||
{
|
||||
distances[i] = distance;
|
||||
texels_per_partition[partition_of_texel[i]]--;
|
||||
texels_per_partition[j]++;
|
||||
partition_of_texel[i] = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// it is possible to get a situation where one of the partitions ends up
|
||||
// without any texels. In this case, we assign texel N to partition N;
|
||||
// this is silly, but ensures that every partition retains at least one texel.
|
||||
// Reassigning a texel in this manner may cause another partition to go empty,
|
||||
// so if we actually did a reassignment, we run the whole loop over again.
|
||||
int problem_case;
|
||||
do
|
||||
{
|
||||
problem_case = 0;
|
||||
for (i = 0; i < partition_count; i++)
|
||||
{
|
||||
if (texels_per_partition[i] == 0)
|
||||
{
|
||||
texels_per_partition[partition_of_texel[i]]--;
|
||||
texels_per_partition[i]++;
|
||||
partition_of_texel[i] = i;
|
||||
problem_case = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
while (problem_case != 0);
|
||||
|
||||
}
|
||||
|
||||
|
||||
// basic k-means clustering: given a set of cluster assignments
|
||||
// for the texels, find the center position of each cluster.
|
||||
void basic_kmeans_update(int xdim, int ydim, int zdim, int partition_count, const imageblock * blk, const int *partition_of_texel, float4 * cluster_centers)
|
||||
{
|
||||
int i;
|
||||
|
||||
int texels_per_block = xdim * ydim * zdim;
|
||||
|
||||
float4 color_sum[4];
|
||||
int weight_sum[4];
|
||||
|
||||
for (i = 0; i < partition_count; i++)
|
||||
{
|
||||
color_sum[i] = float4(0, 0, 0, 0);
|
||||
weight_sum[i] = 0;
|
||||
}
|
||||
|
||||
|
||||
// first, find the center-of-gravity in each cluster
|
||||
for (i = 0; i < texels_per_block; i++)
|
||||
{
|
||||
float4 color = float4(blk->work_data[4 * i],
|
||||
blk->work_data[4 * i + 1],
|
||||
blk->work_data[4 * i + 2],
|
||||
blk->work_data[4 * i + 3]);
|
||||
int part = partition_of_texel[i];
|
||||
color_sum[part] = color_sum[part] + color;
|
||||
weight_sum[part]++;
|
||||
}
|
||||
|
||||
for (i = 0; i < partition_count; i++)
|
||||
{
|
||||
cluster_centers[i] = color_sum[i] * (1.0f / weight_sum[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// after a few rounds of k-means-clustering, we should have a set of 2, 3 or 4 partitions;
|
||||
// we then turn this set into 2, 3 or 4 bitmaps. Then, for each of the 1024 partitions,
|
||||
// we try to match the bitmaps as well as possible.
|
||||
|
||||
|
||||
|
||||
|
||||
static inline int bitcount(uint64_t p)
|
||||
{
|
||||
if (sizeof(void *) > 4)
|
||||
{
|
||||
uint64_t mask1 = 0x5555555555555555ULL;
|
||||
uint64_t mask2 = 0x3333333333333333ULL;
|
||||
uint64_t mask3 = 0x0F0F0F0F0F0F0F0FULL;
|
||||
// best-known algorithm for 64-bit bitcount, assuming 64-bit processor
|
||||
// should probably be adapted for use with 32-bit processors and/or processors
|
||||
// with a POPCNT instruction, but leave that for later.
|
||||
p -= (p >> 1) & mask1;
|
||||
p = (p & mask2) + ((p >> 2) & mask2);
|
||||
p += p >> 4;
|
||||
p &= mask3;
|
||||
p *= 0x0101010101010101ULL;
|
||||
p >>= 56;
|
||||
return (int)p;
|
||||
}
|
||||
else
|
||||
{
|
||||
// on 32-bit processor, split the 64-bit input argument in two,
|
||||
// and bitcount each half separately.
|
||||
uint32_t p1 = (uint32_t) p;
|
||||
uint32_t p2 = (uint32_t) (p >> 32);
|
||||
uint32_t mask1 = 0x55555555U;
|
||||
uint32_t mask2 = 0x33333333U;
|
||||
uint32_t mask3 = 0x0F0F0F0FU;
|
||||
p1 = p1 - ((p1 >> 1) & mask1);
|
||||
p2 = p2 - ((p2 >> 1) & mask1);
|
||||
p1 = (p1 & mask2) + ((p1 >> 2) & mask2);
|
||||
p2 = (p2 & mask2) + ((p2 >> 2) & mask2);
|
||||
p1 += p1 >> 4;
|
||||
p2 += p2 >> 4;
|
||||
p1 &= mask3;
|
||||
p2 &= mask3;
|
||||
p1 += p2;
|
||||
p1 *= 0x01010101U;
|
||||
p1 >>= 24;
|
||||
return (int)p1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// compute the bit-mismatch for a partitioning in 2-partition mode
|
||||
static inline int partition_mismatch2(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
|
||||
{
|
||||
int v1 = bitcount(a0 ^ b0) + bitcount(a1 ^ b1);
|
||||
int v2 = bitcount(a0 ^ b1) + bitcount(a1 ^ b0);
|
||||
return MIN(v1, v2);
|
||||
}
|
||||
|
||||
|
||||
// compute the bit-mismatch for a partitioning in 3-partition mode
|
||||
static inline int partition_mismatch3(uint64_t a0, uint64_t a1, uint64_t a2, uint64_t b0, uint64_t b1, uint64_t b2)
|
||||
{
|
||||
int p00 = bitcount(a0 ^ b0);
|
||||
int p01 = bitcount(a0 ^ b1);
|
||||
int p02 = bitcount(a0 ^ b2);
|
||||
|
||||
int p10 = bitcount(a1 ^ b0);
|
||||
int p11 = bitcount(a1 ^ b1);
|
||||
int p12 = bitcount(a1 ^ b2);
|
||||
|
||||
int p20 = bitcount(a2 ^ b0);
|
||||
int p21 = bitcount(a2 ^ b1);
|
||||
int p22 = bitcount(a2 ^ b2);
|
||||
|
||||
int s0 = p11 + p22;
|
||||
int s1 = p12 + p21;
|
||||
int v0 = MIN(s0, s1) + p00;
|
||||
|
||||
int s2 = p10 + p22;
|
||||
int s3 = p12 + p20;
|
||||
int v1 = MIN(s2, s3) + p01;
|
||||
|
||||
int s4 = p10 + p21;
|
||||
int s5 = p11 + p20;
|
||||
int v2 = MIN(s4, s5) + p02;
|
||||
|
||||
if (v1 < v0)
|
||||
v0 = v1;
|
||||
if (v2 < v0)
|
||||
v0 = v2;
|
||||
|
||||
// 9 add, 5 MIN
|
||||
|
||||
return v0;
|
||||
}
|
||||
|
||||
static inline int MIN3(int a, int b, int c)
|
||||
{
|
||||
int d = MIN(a, b);
|
||||
return MIN(c, d);
|
||||
}
|
||||
|
||||
// compute the bit-mismatch for a partitioning in 4-partition mode
|
||||
static inline int partition_mismatch4(uint64_t a0, uint64_t a1, uint64_t a2, uint64_t a3, uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3)
|
||||
{
|
||||
int p00 = bitcount(a0 ^ b0);
|
||||
int p01 = bitcount(a0 ^ b1);
|
||||
int p02 = bitcount(a0 ^ b2);
|
||||
int p03 = bitcount(a0 ^ b3);
|
||||
|
||||
int p10 = bitcount(a1 ^ b0);
|
||||
int p11 = bitcount(a1 ^ b1);
|
||||
int p12 = bitcount(a1 ^ b2);
|
||||
int p13 = bitcount(a1 ^ b3);
|
||||
|
||||
int p20 = bitcount(a2 ^ b0);
|
||||
int p21 = bitcount(a2 ^ b1);
|
||||
int p22 = bitcount(a2 ^ b2);
|
||||
int p23 = bitcount(a2 ^ b3);
|
||||
|
||||
int p30 = bitcount(a3 ^ b0);
|
||||
int p31 = bitcount(a3 ^ b1);
|
||||
int p32 = bitcount(a3 ^ b2);
|
||||
int p33 = bitcount(a3 ^ b3);
|
||||
|
||||
int mx23 = MIN(p22 + p33, p23 + p32);
|
||||
int mx13 = MIN(p21 + p33, p23 + p31);
|
||||
int mx12 = MIN(p21 + p32, p22 + p31);
|
||||
int mx03 = MIN(p20 + p33, p23 + p30);
|
||||
int mx02 = MIN(p20 + p32, p22 + p30);
|
||||
int mx01 = MIN(p21 + p30, p20 + p31);
|
||||
|
||||
int v0 = p00 + MIN3(p11 + mx23, p12 + mx13, p13 + mx12);
|
||||
int v1 = p01 + MIN3(p10 + mx23, p12 + mx03, p13 + mx02);
|
||||
int v2 = p02 + MIN3(p11 + mx03, p10 + mx13, p13 + mx01);
|
||||
int v3 = p03 + MIN3(p11 + mx02, p12 + mx01, p10 + mx12);
|
||||
|
||||
int x0 = MIN(v0, v1);
|
||||
int x1 = MIN(v2, v3);
|
||||
return MIN(x0, x1);
|
||||
|
||||
// 16 bitcount, 17 MIN, 28 ADD
|
||||
}
|
||||
|
||||
|
||||
|
||||
void count_partition_mismatch_bits(int xdim, int ydim, int zdim, int partition_count, const uint64_t bitmaps[4], int bitcounts[PARTITION_COUNT])
|
||||
{
|
||||
int i;
|
||||
const partition_info *pi = get_partition_table(xdim, ydim, zdim, partition_count);
|
||||
|
||||
if (partition_count == 2)
|
||||
{
|
||||
uint64_t bm0 = bitmaps[0];
|
||||
uint64_t bm1 = bitmaps[1];
|
||||
for (i = 0; i < PARTITION_COUNT; i++)
|
||||
{
|
||||
if (pi->partition_count == 2)
|
||||
{
|
||||
bitcounts[i] = partition_mismatch2(bm0, bm1, pi->coverage_bitmaps[0], pi->coverage_bitmaps[1]);
|
||||
}
|
||||
else
|
||||
bitcounts[i] = 255;
|
||||
pi++;
|
||||
}
|
||||
}
|
||||
else if (partition_count == 3)
|
||||
{
|
||||
uint64_t bm0 = bitmaps[0];
|
||||
uint64_t bm1 = bitmaps[1];
|
||||
uint64_t bm2 = bitmaps[2];
|
||||
for (i = 0; i < PARTITION_COUNT; i++)
|
||||
{
|
||||
if (pi->partition_count == 3)
|
||||
{
|
||||
bitcounts[i] = partition_mismatch3(bm0, bm1, bm2, pi->coverage_bitmaps[0], pi->coverage_bitmaps[1], pi->coverage_bitmaps[2]);
|
||||
}
|
||||
else
|
||||
bitcounts[i] = 255;
|
||||
pi++;
|
||||
}
|
||||
}
|
||||
else if (partition_count == 4)
|
||||
{
|
||||
uint64_t bm0 = bitmaps[0];
|
||||
uint64_t bm1 = bitmaps[1];
|
||||
uint64_t bm2 = bitmaps[2];
|
||||
uint64_t bm3 = bitmaps[3];
|
||||
for (i = 0; i < PARTITION_COUNT; i++)
|
||||
{
|
||||
if (pi->partition_count == 4)
|
||||
{
|
||||
bitcounts[i] = partition_mismatch4(bm0, bm1, bm2, bm3, pi->coverage_bitmaps[0], pi->coverage_bitmaps[1], pi->coverage_bitmaps[2], pi->coverage_bitmaps[3]);
|
||||
}
|
||||
else
|
||||
bitcounts[i] = 255;
|
||||
pi++;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
// counting-sort on the mismatch-bits, thereby
|
||||
// sorting the partitions into an ordering.
|
||||
|
||||
void get_partition_ordering_by_mismatch_bits(const int mismatch_bits[PARTITION_COUNT], int partition_ordering[PARTITION_COUNT])
|
||||
{
|
||||
int i;
|
||||
|
||||
int mscount[256];
|
||||
for (i = 0; i < 256; i++)
|
||||
mscount[i] = 0;
|
||||
|
||||
for (i = 0; i < PARTITION_COUNT; i++)
|
||||
mscount[mismatch_bits[i]]++;
|
||||
|
||||
int summa = 0;
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
int cnt = mscount[i];
|
||||
mscount[i] = summa;
|
||||
summa += cnt;
|
||||
}
|
||||
|
||||
for (i = 0; i < PARTITION_COUNT; i++)
|
||||
{
|
||||
int idx = mscount[mismatch_bits[i]]++;
|
||||
partition_ordering[idx] = i;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void kmeans_compute_partition_ordering(int xdim, int ydim, int zdim, int partition_count, const imageblock * blk, int *ordering)
|
||||
{
|
||||
int i;
|
||||
|
||||
const block_size_descriptor *bsd = get_block_size_descriptor(xdim, ydim, zdim);
|
||||
|
||||
float4 cluster_centers[4];
|
||||
int partition_of_texel[MAX_TEXELS_PER_BLOCK];
|
||||
|
||||
// 3 passes of plain k-means partitioning
|
||||
for (i = 0; i < 3; i++)
|
||||
{
|
||||
if (i == 0)
|
||||
kpp_initialize(xdim, ydim, zdim, partition_count, blk, cluster_centers);
|
||||
else
|
||||
basic_kmeans_update(xdim, ydim, zdim, partition_count, blk, partition_of_texel, cluster_centers);
|
||||
|
||||
basic_kmeans_assign_pass(xdim, ydim, zdim, partition_count, blk, cluster_centers, partition_of_texel);
|
||||
}
|
||||
|
||||
// at this point, we have a near-ideal partitioning.
|
||||
|
||||
// construct bitmaps
|
||||
uint64_t bitmaps[4];
|
||||
for (i = 0; i < 4; i++)
|
||||
bitmaps[i] = 0ULL;
|
||||
|
||||
int texels_to_process = bsd->texelcount_for_bitmap_partitioning;
|
||||
for (i = 0; i < texels_to_process; i++)
|
||||
{
|
||||
int idx = bsd->texels_for_bitmap_partitioning[i];
|
||||
bitmaps[partition_of_texel[idx]] |= 1ULL << i;
|
||||
}
|
||||
|
||||
int bitcounts[PARTITION_COUNT];
|
||||
// for each entry in the partition table, count bits of partition-mismatch.
|
||||
count_partition_mismatch_bits(xdim, ydim, zdim, partition_count, bitmaps, bitcounts);
|
||||
|
||||
// finally, sort the partitions by bits-of-partition-mismatch
|
||||
get_partition_ordering_by_mismatch_bits(bitcounts, ordering);
|
||||
|
||||
}
|
||||
681
3rdparty/astc/astc_lib.cpp
vendored
Normal file
681
3rdparty/astc/astc_lib.cpp
vendored
Normal file
@@ -0,0 +1,681 @@
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/**
|
||||
* @author Andrew Willmott
|
||||
*
|
||||
* @brief Library api for astc codec, to be used as an alternative to astc_toplevel.cpp
|
||||
*/
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
|
||||
#include "astc_lib.h"
|
||||
|
||||
#include "astc_codec_internals.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
|
||||
// Globals declared in astc_codec_internals.h
|
||||
int perform_srgb_transform = 0;
|
||||
int alpha_force_use_of_hdr = 0;
|
||||
int rgb_force_use_of_hdr = 0;
|
||||
int print_tile_errors = 0;
|
||||
|
||||
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
||||
int print_diagnostics = 0;
|
||||
int diagnostics_tile = -1;
|
||||
#endif
|
||||
|
||||
// ASTC code expects this to be defined
|
||||
void astc_codec_internal_error(const char* filename, int line)
|
||||
{
|
||||
fprintf(stderr, "ASTC encode error @ %s:%d\n", filename, line);
|
||||
}
|
||||
|
||||
// @todo add HDR variants
|
||||
|
||||
namespace
|
||||
{
|
||||
static bool s_tables_initialised = false;
|
||||
|
||||
inline void init_tables()
|
||||
{
|
||||
if (!s_tables_initialised)
|
||||
{
|
||||
prepare_angular_tables();
|
||||
build_quantization_mode_table();
|
||||
|
||||
s_tables_initialised = true;
|
||||
}
|
||||
}
|
||||
|
||||
const swizzlepattern k_swizzles[] =
|
||||
{
|
||||
{ 0, 1, 2, 3 }, // ASTC_RGBA
|
||||
{ 2, 1, 0, 3 }, // ASTC_BGRA
|
||||
};
|
||||
|
||||
void alloc_temp_buffers(compress_symbolic_block_buffers* temp_buffers)
|
||||
{
|
||||
temp_buffers->ewb = new error_weight_block;
|
||||
temp_buffers->ewbo = new error_weight_block_orig;
|
||||
temp_buffers->tempblocks = new symbolic_compressed_block[4];
|
||||
temp_buffers->temp = new imageblock;
|
||||
|
||||
temp_buffers->planes2 = new compress_fixed_partition_buffers;
|
||||
temp_buffers->planes2->ei1 = new endpoints_and_weights;
|
||||
temp_buffers->planes2->ei2 = new endpoints_and_weights;
|
||||
temp_buffers->planes2->eix1 = new endpoints_and_weights[MAX_DECIMATION_MODES];
|
||||
temp_buffers->planes2->eix2 = new endpoints_and_weights[MAX_DECIMATION_MODES];
|
||||
temp_buffers->planes2->decimated_quantized_weights = new float[2 * MAX_DECIMATION_MODES * MAX_WEIGHTS_PER_BLOCK];
|
||||
temp_buffers->planes2->decimated_weights = new float[2 * MAX_DECIMATION_MODES * MAX_WEIGHTS_PER_BLOCK];
|
||||
temp_buffers->planes2->flt_quantized_decimated_quantized_weights = new float[2 * MAX_WEIGHT_MODES * MAX_WEIGHTS_PER_BLOCK];
|
||||
temp_buffers->planes2->u8_quantized_decimated_quantized_weights = new uint8_t[2 * MAX_WEIGHT_MODES * MAX_WEIGHTS_PER_BLOCK];
|
||||
temp_buffers->plane1 = temp_buffers->planes2;
|
||||
}
|
||||
|
||||
void free_temp_buffers(compress_symbolic_block_buffers* temp_buffers)
|
||||
{
|
||||
delete[] temp_buffers->planes2->decimated_quantized_weights;
|
||||
delete[] temp_buffers->planes2->decimated_weights;
|
||||
delete[] temp_buffers->planes2->flt_quantized_decimated_quantized_weights;
|
||||
delete[] temp_buffers->planes2->u8_quantized_decimated_quantized_weights;
|
||||
delete[] temp_buffers->planes2->eix1;
|
||||
delete[] temp_buffers->planes2->eix2;
|
||||
delete temp_buffers->planes2->ei1;
|
||||
delete temp_buffers->planes2->ei2;
|
||||
delete temp_buffers->planes2;
|
||||
|
||||
delete[] temp_buffers->tempblocks;
|
||||
delete temp_buffers->temp;
|
||||
delete temp_buffers->ewbo;
|
||||
delete temp_buffers->ewb;
|
||||
}
|
||||
|
||||
|
||||
// More direct version of the astc_codec_image routine, which operates on a
|
||||
// more conventional 2D image layout. Doesn't support padding, so
|
||||
// mean_stdev_radius and alpha_radius etc. must be zero.
|
||||
void to_imageblock
|
||||
(
|
||||
imageblock* pb,
|
||||
const uint8_t* src_data,
|
||||
int src_stride,
|
||||
int xpos,
|
||||
int ypos,
|
||||
int xsize,
|
||||
int ysize,
|
||||
int xdim,
|
||||
int ydim,
|
||||
swizzlepattern swz,
|
||||
bool srgb
|
||||
)
|
||||
{
|
||||
float* fptr = pb->orig_data;
|
||||
|
||||
pb->xpos = xpos;
|
||||
pb->ypos = ypos;
|
||||
pb->zpos = 0;
|
||||
|
||||
float data[6];
|
||||
data[4] = 0;
|
||||
data[5] = 1;
|
||||
|
||||
for (int y = 0; y < ydim; y++)
|
||||
{
|
||||
for (int x = 0; x < xdim; x++)
|
||||
{
|
||||
int xi = xpos + x;
|
||||
int yi = ypos + y;
|
||||
|
||||
if (xi >= xsize)
|
||||
xi = xsize - 1;
|
||||
if (yi >= ysize)
|
||||
yi = ysize - 1;
|
||||
|
||||
int offset = src_stride * yi + 4 * xi;
|
||||
|
||||
int r = src_data[offset + 0];
|
||||
int g = src_data[offset + 1];
|
||||
int b = src_data[offset + 2];
|
||||
int a = src_data[offset + 3];
|
||||
|
||||
data[0] = r / 255.0f;
|
||||
data[1] = g / 255.0f;
|
||||
data[2] = b / 255.0f;
|
||||
data[3] = a / 255.0f;
|
||||
|
||||
fptr[0] = data[swz.r];
|
||||
fptr[1] = data[swz.g];
|
||||
fptr[2] = data[swz.b];
|
||||
fptr[3] = data[swz.a];
|
||||
|
||||
fptr += 4;
|
||||
}
|
||||
}
|
||||
|
||||
// perform sRGB-to-linear transform on input data, if requested.
|
||||
int pixelcount = xdim * ydim;
|
||||
|
||||
if (srgb)
|
||||
{
|
||||
fptr = pb->orig_data;
|
||||
|
||||
for (int i = 0; i < pixelcount; i++)
|
||||
{
|
||||
float r = fptr[0];
|
||||
float g = fptr[1];
|
||||
float b = fptr[2];
|
||||
|
||||
if (r <= 0.04045f)
|
||||
r = r * (1.0f / 12.92f);
|
||||
else if (r <= 1)
|
||||
r = pow((r + 0.055f) * (1.0f / 1.055f), 2.4f);
|
||||
|
||||
if (g <= 0.04045f)
|
||||
g = g * (1.0f / 12.92f);
|
||||
else if (g <= 1)
|
||||
g = pow((g + 0.055f) * (1.0f / 1.055f), 2.4f);
|
||||
|
||||
if (b <= 0.04045f)
|
||||
b = b * (1.0f / 12.92f);
|
||||
else if (b <= 1)
|
||||
b = pow((b + 0.055f) * (1.0f / 1.055f), 2.4f);
|
||||
|
||||
fptr[0] = r;
|
||||
fptr[1] = g;
|
||||
fptr[2] = b;
|
||||
|
||||
fptr += 4;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < pixelcount; i++)
|
||||
{
|
||||
pb->rgb_lns [i] = 0;
|
||||
pb->alpha_lns[i] = 0;
|
||||
pb->nan_texel[i] = 0;
|
||||
}
|
||||
|
||||
imageblock_initialize_work_from_orig(pb, pixelcount);
|
||||
|
||||
update_imageblock_flags(pb, xdim, ydim, 1);
|
||||
}
|
||||
|
||||
void encode_astc
|
||||
(
|
||||
const uint8_t* src,
|
||||
int src_stride,
|
||||
swizzlepattern src_swz,
|
||||
int xsize,
|
||||
int ysize,
|
||||
int xdim,
|
||||
int ydim,
|
||||
const error_weighting_params* ewp,
|
||||
astc_decode_mode decode_mode,
|
||||
uint8_t* dst
|
||||
)
|
||||
{
|
||||
int xblocks = (xsize + xdim - 1) / xdim;
|
||||
int yblocks = (ysize + ydim - 1) / ydim;
|
||||
|
||||
get_block_size_descriptor(xdim, ydim, 1);
|
||||
get_partition_table(xdim, ydim, 1, 0);
|
||||
|
||||
imageblock pb;
|
||||
|
||||
compress_symbolic_block_buffers temp_buffers;
|
||||
alloc_temp_buffers(&temp_buffers);
|
||||
|
||||
astc_codec_image image_info = { nullptr, nullptr, xsize, ysize, 1, 0 };
|
||||
|
||||
for (int y = 0; y < yblocks; y++)
|
||||
for (int x = 0; x < xblocks; x++)
|
||||
{
|
||||
to_imageblock(&pb, src, src_stride, x * xdim, y * ydim, xsize, ysize, xdim, ydim, src_swz, decode_mode == DECODE_LDR_SRGB);
|
||||
|
||||
symbolic_compressed_block scb;
|
||||
compress_symbolic_block(&image_info, decode_mode, xdim, ydim, 1, ewp, &pb, &scb, &temp_buffers);
|
||||
|
||||
physical_compressed_block pcb = symbolic_to_physical(xdim, ydim, 1, &scb);
|
||||
|
||||
uint8_t* dst_block = dst + (y * xblocks + x) * 16;
|
||||
|
||||
*(physical_compressed_block*) dst_block = pcb;
|
||||
}
|
||||
|
||||
free_temp_buffers(&temp_buffers);
|
||||
}
|
||||
|
||||
void init_ewp(error_weighting_params& ewp)
|
||||
{
|
||||
ewp.rgb_power = 1.0f;
|
||||
ewp.alpha_power = 1.0f;
|
||||
ewp.rgb_base_weight = 1.0f;
|
||||
ewp.alpha_base_weight = 1.0f;
|
||||
ewp.rgb_mean_weight = 0.0f;
|
||||
ewp.rgb_stdev_weight = 0.0f;
|
||||
ewp.alpha_mean_weight = 0.0f;
|
||||
ewp.alpha_stdev_weight = 0.0f;
|
||||
|
||||
ewp.rgb_mean_and_stdev_mixing = 0.0f;
|
||||
ewp.mean_stdev_radius = 0;
|
||||
ewp.enable_rgb_scale_with_alpha = 0;
|
||||
ewp.alpha_radius = 0;
|
||||
|
||||
ewp.block_artifact_suppression = 0.0f;
|
||||
ewp.rgba_weights[0] = 1.0f;
|
||||
ewp.rgba_weights[1] = 1.0f;
|
||||
ewp.rgba_weights[2] = 1.0f;
|
||||
ewp.rgba_weights[3] = 1.0f;
|
||||
ewp.ra_normal_angular_scale = 0;
|
||||
}
|
||||
|
||||
void setup_ewp(ASTC_COMPRESS_MODE mode, int ydim, int xdim, error_weighting_params& ewp)
|
||||
{
|
||||
float oplimit_autoset = 0.0;
|
||||
float dblimit_autoset_2d = 0.0;
|
||||
float bmc_autoset = 0.0;
|
||||
float mincorrel_autoset = 0.0;
|
||||
|
||||
int plimit_autoset = -1;
|
||||
int maxiters_autoset = 0;
|
||||
int pcdiv = 1;
|
||||
|
||||
float log10_texels_2d = log((float)(xdim * ydim)) / log(10.0f);
|
||||
|
||||
if (mode == ASTC_COMPRESS_VERY_FAST)
|
||||
{
|
||||
plimit_autoset = 2;
|
||||
oplimit_autoset = 1.0;
|
||||
dblimit_autoset_2d = MAX(70 - 35 * log10_texels_2d, 53 - 19 * log10_texels_2d);
|
||||
bmc_autoset = 25;
|
||||
mincorrel_autoset = 0.5;
|
||||
maxiters_autoset = 1;
|
||||
|
||||
switch (ydim)
|
||||
{
|
||||
case 4:
|
||||
pcdiv = 240;
|
||||
break;
|
||||
case 5:
|
||||
pcdiv = 56;
|
||||
break;
|
||||
case 6:
|
||||
pcdiv = 64;
|
||||
break;
|
||||
case 8:
|
||||
pcdiv = 47;
|
||||
break;
|
||||
case 10:
|
||||
pcdiv = 36;
|
||||
break;
|
||||
case 12:
|
||||
pcdiv = 30;
|
||||
break;
|
||||
default:
|
||||
pcdiv = 30;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (mode == ASTC_COMPRESS_FAST)
|
||||
{
|
||||
plimit_autoset = 4;
|
||||
oplimit_autoset = 1.0;
|
||||
mincorrel_autoset = 0.5;
|
||||
dblimit_autoset_2d = MAX(85 - 35 * log10_texels_2d, 63 - 19 * log10_texels_2d);
|
||||
bmc_autoset = 50;
|
||||
maxiters_autoset = 1;
|
||||
|
||||
switch (ydim)
|
||||
{
|
||||
case 4:
|
||||
pcdiv = 60;
|
||||
break;
|
||||
case 5:
|
||||
pcdiv = 27;
|
||||
break;
|
||||
case 6:
|
||||
pcdiv = 30;
|
||||
break;
|
||||
case 8:
|
||||
pcdiv = 24;
|
||||
break;
|
||||
case 10:
|
||||
pcdiv = 16;
|
||||
break;
|
||||
case 12:
|
||||
pcdiv = 20;
|
||||
break;
|
||||
default:
|
||||
pcdiv = 20;
|
||||
break;
|
||||
};
|
||||
}
|
||||
else if (mode == ASTC_COMPRESS_MEDIUM)
|
||||
{
|
||||
plimit_autoset = 25;
|
||||
oplimit_autoset = 1.2f;
|
||||
mincorrel_autoset = 0.75f;
|
||||
dblimit_autoset_2d = MAX(95 - 35 * log10_texels_2d, 70 - 19 * log10_texels_2d);
|
||||
bmc_autoset = 75;
|
||||
maxiters_autoset = 2;
|
||||
|
||||
switch (ydim)
|
||||
{
|
||||
case 4:
|
||||
pcdiv = 25;
|
||||
break;
|
||||
case 5:
|
||||
pcdiv = 15;
|
||||
break;
|
||||
case 6:
|
||||
pcdiv = 15;
|
||||
break;
|
||||
case 8:
|
||||
pcdiv = 10;
|
||||
break;
|
||||
case 10:
|
||||
pcdiv = 8;
|
||||
break;
|
||||
case 12:
|
||||
pcdiv = 6;
|
||||
break;
|
||||
default:
|
||||
pcdiv = 6;
|
||||
break;
|
||||
};
|
||||
}
|
||||
else if (mode == ASTC_COMPRESS_THOROUGH)
|
||||
{
|
||||
plimit_autoset = 100;
|
||||
oplimit_autoset = 2.5f;
|
||||
mincorrel_autoset = 0.95f;
|
||||
dblimit_autoset_2d = MAX(105 - 35 * log10_texels_2d, 77 - 19 * log10_texels_2d);
|
||||
bmc_autoset = 95;
|
||||
maxiters_autoset = 4;
|
||||
|
||||
switch (ydim)
|
||||
{
|
||||
case 4:
|
||||
pcdiv = 12;
|
||||
break;
|
||||
case 5:
|
||||
pcdiv = 7;
|
||||
break;
|
||||
case 6:
|
||||
pcdiv = 7;
|
||||
break;
|
||||
case 8:
|
||||
pcdiv = 5;
|
||||
break;
|
||||
case 10:
|
||||
pcdiv = 4;
|
||||
break;
|
||||
case 12:
|
||||
pcdiv = 3;
|
||||
break;
|
||||
default:
|
||||
pcdiv = 3;
|
||||
break;
|
||||
};
|
||||
}
|
||||
else if (mode == ASTC_COMPRESS_EXHAUSTIVE)
|
||||
{
|
||||
plimit_autoset = PARTITION_COUNT;
|
||||
oplimit_autoset = 1000.0f;
|
||||
mincorrel_autoset = 0.99f;
|
||||
dblimit_autoset_2d = 999.0f;
|
||||
bmc_autoset = 100;
|
||||
maxiters_autoset = 4;
|
||||
|
||||
switch (ydim)
|
||||
{
|
||||
case 4:
|
||||
pcdiv = 3;
|
||||
break;
|
||||
case 5:
|
||||
pcdiv = 1;
|
||||
break;
|
||||
case 6:
|
||||
pcdiv = 1;
|
||||
break;
|
||||
case 8:
|
||||
pcdiv = 1;
|
||||
break;
|
||||
case 10:
|
||||
pcdiv = 1;
|
||||
break;
|
||||
case 12:
|
||||
pcdiv = 1;
|
||||
break;
|
||||
default:
|
||||
pcdiv = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int partitions_to_test = plimit_autoset;
|
||||
float dblimit_2d = dblimit_autoset_2d;
|
||||
float oplimit = oplimit_autoset;
|
||||
float mincorrel = mincorrel_autoset;
|
||||
|
||||
int maxiters = maxiters_autoset;
|
||||
ewp.max_refinement_iters = maxiters;
|
||||
|
||||
ewp.block_mode_cutoff = bmc_autoset / 100.0f;
|
||||
|
||||
float texel_avg_error_limit_2d;
|
||||
|
||||
if (rgb_force_use_of_hdr == 0)
|
||||
{
|
||||
texel_avg_error_limit_2d = pow(0.1f, dblimit_2d * 0.1f) * 65535.0f * 65535.0f;
|
||||
}
|
||||
else
|
||||
{
|
||||
texel_avg_error_limit_2d = 0.0f;
|
||||
}
|
||||
ewp.partition_1_to_2_limit = oplimit;
|
||||
ewp.lowest_correlation_cutoff = mincorrel;
|
||||
|
||||
if (partitions_to_test < 1)
|
||||
partitions_to_test = 1;
|
||||
else if (partitions_to_test > PARTITION_COUNT)
|
||||
partitions_to_test = PARTITION_COUNT;
|
||||
ewp.partition_search_limit = partitions_to_test;
|
||||
|
||||
ewp.texel_avg_error_limit = texel_avg_error_limit_2d;
|
||||
|
||||
expand_block_artifact_suppression(xdim, ydim, 1, &ewp);
|
||||
}
|
||||
}
|
||||
|
||||
size_t astc_compressed_size(int w, int h, int bw, int bh)
|
||||
{
|
||||
int nx = (w + bw - 1) / bw;
|
||||
int ny = (h + bh - 1) / bh;
|
||||
|
||||
return nx * ny * 16;
|
||||
}
|
||||
|
||||
void astc_compress
|
||||
(
|
||||
int src_width,
|
||||
int src_height,
|
||||
const uint8_t* src_data,
|
||||
ASTC_CHANNELS src_channels,
|
||||
int src_stride,
|
||||
|
||||
int block_width,
|
||||
int block_height,
|
||||
ASTC_COMPRESS_MODE compress_mode,
|
||||
ASTC_DECODE_MODE decode_mode,
|
||||
uint8_t* dst_data
|
||||
)
|
||||
{
|
||||
init_tables();
|
||||
|
||||
error_weighting_params ewp;
|
||||
init_ewp(ewp);
|
||||
setup_ewp(compress_mode, block_width, block_height, ewp);
|
||||
|
||||
if (src_stride == 0)
|
||||
src_stride = src_width * 4;
|
||||
|
||||
encode_astc
|
||||
(
|
||||
src_data,
|
||||
src_stride,
|
||||
k_swizzles[src_channels],
|
||||
src_width, src_height,
|
||||
block_width, block_height,
|
||||
&ewp,
|
||||
(astc_decode_mode) decode_mode,
|
||||
dst_data
|
||||
);
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
// More direct version of the astc_codec_image routine, which operates on a
|
||||
// more conventional 2D image layout.
|
||||
void from_imageblock(int xdim, int ydim, const imageblock* pb, bool srgb, swizzlepattern swz, uint8_t* dst_data, int dst_stride)
|
||||
{
|
||||
const float* fptr = pb->orig_data;
|
||||
const uint8_t* nptr = pb->nan_texel;
|
||||
|
||||
for (int y = 0; y < ydim; y++)
|
||||
{
|
||||
for (int x = 0; x < xdim; x++)
|
||||
{
|
||||
if (*nptr)
|
||||
{
|
||||
// NaN-pixel, but we can't display it. Display purple instead.
|
||||
dst_data[4 * x + swz.r] = 0xFF;
|
||||
dst_data[4 * x + swz.g] = 0x00;
|
||||
dst_data[4 * x + swz.b] = 0xFF;
|
||||
dst_data[4 * x + swz.a] = 0xFF;
|
||||
}
|
||||
else
|
||||
{
|
||||
float r = fptr[0];
|
||||
float g = fptr[1];
|
||||
float b = fptr[2];
|
||||
float a = fptr[3];
|
||||
|
||||
if (srgb)
|
||||
{
|
||||
if (r <= 0.0031308f)
|
||||
r = r * 12.92f;
|
||||
else if (r <= 1)
|
||||
r = 1.055f * pow(r, (1.0f / 2.4f)) - 0.055f;
|
||||
|
||||
if (g <= 0.0031308f)
|
||||
g = g * 12.92f;
|
||||
else if (g <= 1)
|
||||
g = 1.055f * pow(g, (1.0f / 2.4f)) - 0.055f;
|
||||
|
||||
if (b <= 0.0031308f)
|
||||
b = b * 12.92f;
|
||||
else if (b <= 1)
|
||||
b = 1.055f * pow(b, (1.0f / 2.4f)) - 0.055f;
|
||||
}
|
||||
|
||||
// clamp to [0,1]
|
||||
if (r > 1.0f)
|
||||
r = 1.0f;
|
||||
if (g > 1.0f)
|
||||
g = 1.0f;
|
||||
if (b > 1.0f)
|
||||
b = 1.0f;
|
||||
if (a > 1.0f)
|
||||
a = 1.0f;
|
||||
|
||||
// pack the data
|
||||
dst_data[4 * x + swz.r] = uint8_t(floorf(r * 255.0f + 0.5f));
|
||||
dst_data[4 * x + swz.g] = uint8_t(floorf(g * 255.0f + 0.5f));
|
||||
dst_data[4 * x + swz.b] = uint8_t(floorf(b * 255.0f + 0.5f));
|
||||
dst_data[4 * x + swz.a] = uint8_t(floorf(a * 255.0f + 0.5f));
|
||||
}
|
||||
|
||||
fptr += 4;
|
||||
nptr++;
|
||||
}
|
||||
|
||||
dst_data += dst_stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void astc_decompress
|
||||
(
|
||||
const uint8_t* src_data,
|
||||
int xdim,
|
||||
int ydim,
|
||||
ASTC_DECODE_MODE decode_mode,
|
||||
|
||||
int xsize,
|
||||
int ysize,
|
||||
uint8_t* dst_data,
|
||||
ASTC_CHANNELS dst_channels,
|
||||
int dst_stride
|
||||
)
|
||||
{
|
||||
init_tables();
|
||||
|
||||
int xblocks = (xsize + xdim - 1) / xdim;
|
||||
int yblocks = (ysize + ydim - 1) / ydim;
|
||||
|
||||
if (dst_stride == 0)
|
||||
dst_stride = 4 * xsize;
|
||||
|
||||
imageblock pb;
|
||||
|
||||
for (int y = 0; y < yblocks; y++)
|
||||
{
|
||||
int ypos = y * ydim;
|
||||
int clamp_ydim = MIN(ysize - ypos, ydim);
|
||||
|
||||
uint8_t* dst_row = dst_data + ypos * dst_stride;
|
||||
|
||||
for (int x = 0; x < xblocks; x++)
|
||||
{
|
||||
int xpos = x * xdim;
|
||||
int clamp_xdim = MIN(xsize - xpos, xdim);
|
||||
|
||||
physical_compressed_block pcb = *(const physical_compressed_block *) src_data;
|
||||
symbolic_compressed_block scb;
|
||||
|
||||
physical_to_symbolic(xdim, ydim, 1, pcb, &scb);
|
||||
decompress_symbolic_block((astc_decode_mode) decode_mode, xdim, ydim, 1, xpos, ypos, 0, &scb, &pb);
|
||||
|
||||
from_imageblock(clamp_xdim, clamp_ydim, &pb, decode_mode == ASTC_DECODE_LDR_SRGB, k_swizzles[dst_channels], dst_row + xpos * 4, dst_stride);
|
||||
|
||||
src_data += 16;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Relevant astc source files. These aren't set up for a bulk build yet though.
|
||||
#ifdef DISABLED
|
||||
#include "astc_block_sizes2.cpp"
|
||||
#include "astc_color_quantize.cpp"
|
||||
#include "astc_color_unquantize.cpp"
|
||||
#include "astc_compress_symbolic.cpp"
|
||||
#include "astc_compute_variance.cpp"
|
||||
#include "astc_decompress_symbolic.cpp"
|
||||
#include "astc_encoding_choice_error.cpp"
|
||||
#include "astc_find_best_partitioning.cpp"
|
||||
#include "astc_ideal_endpoints_and_weights.cpp"
|
||||
#include "astc_imageblock.cpp"
|
||||
#include "astc_integer_sequence.cpp"
|
||||
#include "astc_kmeans_partitioning.cpp"
|
||||
#include "astc_partition_tables.cpp"
|
||||
#include "astc_percentile_tables.cpp"
|
||||
#include "astc_pick_best_endpoint_format.cpp"
|
||||
#include "astc_quantization.cpp"
|
||||
#include "astc_symbolic_physical.cpp"
|
||||
#include "astc_weight_align.cpp"
|
||||
#include "astc_weight_quant_xfer_tables.cpp"
|
||||
#include "mathlib.cpp"
|
||||
#include "softfloat.cpp"
|
||||
#endif
|
||||
73
3rdparty/astc/astc_lib.h
vendored
Normal file
73
3rdparty/astc/astc_lib.h
vendored
Normal file
@@ -0,0 +1,73 @@
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/**
|
||||
* @author Andrew Willmott
|
||||
*
|
||||
* @brief Library api for astc codec, to be used as an alternative to astc_toplevel.cpp
|
||||
*/
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
#ifndef ASTC_LIB_H
|
||||
#define ASTC_LIB_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
enum ASTC_COMPRESS_MODE // Trade-off compression quality for speed
|
||||
{
|
||||
ASTC_COMPRESS_VERY_FAST,
|
||||
ASTC_COMPRESS_FAST,
|
||||
ASTC_COMPRESS_MEDIUM,
|
||||
ASTC_COMPRESS_THOROUGH,
|
||||
ASTC_COMPRESS_EXHAUSTIVE,
|
||||
};
|
||||
|
||||
enum ASTC_DECODE_MODE
|
||||
{
|
||||
ASTC_DECODE_LDR_SRGB, // texture will be decompressed to 8-bit SRGB
|
||||
ASTC_DECODE_LDR_LINEAR, // texture will be decompressed to 8-bit linear
|
||||
ASTC_DECODE_HDR // texture will be decompressed to 16-bit linear
|
||||
};
|
||||
|
||||
enum ASTC_CHANNELS
|
||||
{
|
||||
ASTC_RGBA,
|
||||
ASTC_BGRA
|
||||
};
|
||||
|
||||
|
||||
size_t astc_compressed_size(int block_width, int block_height, int width, int height);
|
||||
//!< Returns size of the compressed data for a width x height source image, assuming the given block size
|
||||
|
||||
void astc_compress
|
||||
(
|
||||
int src_width,
|
||||
int src_height,
|
||||
const uint8_t* src_data,
|
||||
ASTC_CHANNELS src_channels,
|
||||
int src_stride,
|
||||
|
||||
int block_width,
|
||||
int block_height,
|
||||
ASTC_COMPRESS_MODE compress_mode,
|
||||
ASTC_DECODE_MODE decode_mode,
|
||||
uint8_t* dst_data
|
||||
);
|
||||
//!< Compress 8-bit rgba source image into dst_data (expected to be of size astc_compressed_size(...))
|
||||
|
||||
void astc_decompress
|
||||
(
|
||||
const uint8_t* src_data,
|
||||
int block_width,
|
||||
int block_height,
|
||||
ASTC_DECODE_MODE decode_mode,
|
||||
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
uint8_t* dst_data,
|
||||
ASTC_CHANNELS dst_channels,
|
||||
int dst_stride
|
||||
);
|
||||
//!< Decompress astc source image into 8-bit rgba destination image.
|
||||
|
||||
#endif
|
||||
|
||||
323
3rdparty/astc/astc_partition_tables.cpp
vendored
Normal file
323
3rdparty/astc/astc_partition_tables.cpp
vendored
Normal file
@@ -0,0 +1,323 @@
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This confidential and proprietary software may be used only as
|
||||
* authorised by a licensing agreement from ARM Limited
|
||||
* (C) COPYRIGHT 2011-2012 ARM Limited
|
||||
* ALL RIGHTS RESERVED
|
||||
*
|
||||
* The entire notice above must be reproduced on all authorised
|
||||
* copies and copies may only be made to the extent permitted
|
||||
* by a licensing agreement from ARM Limited.
|
||||
*
|
||||
* @brief Functions to generate partition tables for ASTC.
|
||||
*
|
||||
* We generate tables only for the block sizes that have actually been
|
||||
* specified to the codec.
|
||||
*/
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
#include "astc_codec_internals.h"
|
||||
|
||||
static partition_info **partition_tables[4096];
|
||||
|
||||
/*
|
||||
Produce a canonicalized representation of a partition pattern
|
||||
|
||||
The largest possible such representation is 432 bits, equal to 7 uint64_t values.
|
||||
*/
|
||||
static void gen_canonicalized_partition_table(int texel_count, const uint8_t * partition_table, uint64_t canonicalized[7])
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 7; i++)
|
||||
canonicalized[i] = 0;
|
||||
|
||||
int mapped_index[4];
|
||||
int map_weight_count = 0;
|
||||
for (i = 0; i < 4; i++)
|
||||
mapped_index[i] = -1;
|
||||
|
||||
for (i = 0; i < texel_count; i++)
|
||||
{
|
||||
int index = partition_table[i];
|
||||
if (mapped_index[index] == -1)
|
||||
mapped_index[index] = map_weight_count++;
|
||||
uint64_t xlat_index = mapped_index[index];
|
||||
canonicalized[i >> 5] |= xlat_index << (2 * (i & 0x1F));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static int compare_canonicalized_partition_tables(const uint64_t part1[7], const uint64_t part2[7])
|
||||
{
|
||||
if (part1[0] != part2[0])
|
||||
return 0;
|
||||
if (part1[1] != part2[1])
|
||||
return 0;
|
||||
if (part1[2] != part2[2])
|
||||
return 0;
|
||||
if (part1[3] != part2[3])
|
||||
return 0;
|
||||
if (part1[4] != part2[4])
|
||||
return 0;
|
||||
if (part1[5] != part2[5])
|
||||
return 0;
|
||||
if (part1[6] != part2[6])
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
For a partition table, detect partitionings that are equivalent, then mark them as invalid. This reduces the number of partitions that the codec has to consider and thus improves encode
|
||||
performance. */
|
||||
static void partition_table_zap_equal_elements(int xdim, int ydim, int zdim, partition_info * pi)
|
||||
{
|
||||
int partition_tables_zapped = 0;
|
||||
|
||||
int texel_count = xdim * ydim * zdim;
|
||||
|
||||
int i, j;
|
||||
uint64_t *canonicalizeds = new uint64_t[PARTITION_COUNT * 7];
|
||||
|
||||
|
||||
for (i = 0; i < PARTITION_COUNT; i++)
|
||||
{
|
||||
gen_canonicalized_partition_table(texel_count, pi[i].partition_of_texel, canonicalizeds + i * 7);
|
||||
}
|
||||
|
||||
for (i = 0; i < PARTITION_COUNT; i++)
|
||||
{
|
||||
for (j = 0; j < i; j++)
|
||||
{
|
||||
if (compare_canonicalized_partition_tables(canonicalizeds + 7 * i, canonicalizeds + 7 * j))
|
||||
{
|
||||
pi[i].partition_count = 0;
|
||||
partition_tables_zapped++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
delete[]canonicalizeds;
|
||||
}
|
||||
|
||||
|
||||
uint32_t hash52(uint32_t inp)
|
||||
{
|
||||
inp ^= inp >> 15;
|
||||
|
||||
inp *= 0xEEDE0891; // (2^4+1)*(2^7+1)*(2^17-1)
|
||||
inp ^= inp >> 5;
|
||||
inp += inp << 16;
|
||||
inp ^= inp >> 7;
|
||||
inp ^= inp >> 3;
|
||||
inp ^= inp << 6;
|
||||
inp ^= inp >> 17;
|
||||
return inp;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int select_partition(int seed, int x, int y, int z, int partitioncount, int small_block)
|
||||
{
|
||||
if (small_block)
|
||||
{
|
||||
x <<= 1;
|
||||
y <<= 1;
|
||||
z <<= 1;
|
||||
}
|
||||
|
||||
seed += (partitioncount - 1) * 1024;
|
||||
|
||||
uint32_t rnum = hash52(seed);
|
||||
|
||||
uint8_t seed1 = rnum & 0xF;
|
||||
uint8_t seed2 = (rnum >> 4) & 0xF;
|
||||
uint8_t seed3 = (rnum >> 8) & 0xF;
|
||||
uint8_t seed4 = (rnum >> 12) & 0xF;
|
||||
uint8_t seed5 = (rnum >> 16) & 0xF;
|
||||
uint8_t seed6 = (rnum >> 20) & 0xF;
|
||||
uint8_t seed7 = (rnum >> 24) & 0xF;
|
||||
uint8_t seed8 = (rnum >> 28) & 0xF;
|
||||
uint8_t seed9 = (rnum >> 18) & 0xF;
|
||||
uint8_t seed10 = (rnum >> 22) & 0xF;
|
||||
uint8_t seed11 = (rnum >> 26) & 0xF;
|
||||
uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
|
||||
|
||||
// squaring all the seeds in order to bias their distribution
|
||||
// towards lower values.
|
||||
seed1 *= seed1;
|
||||
seed2 *= seed2;
|
||||
seed3 *= seed3;
|
||||
seed4 *= seed4;
|
||||
seed5 *= seed5;
|
||||
seed6 *= seed6;
|
||||
seed7 *= seed7;
|
||||
seed8 *= seed8;
|
||||
seed9 *= seed9;
|
||||
seed10 *= seed10;
|
||||
seed11 *= seed11;
|
||||
seed12 *= seed12;
|
||||
|
||||
|
||||
int sh1, sh2, sh3;
|
||||
if (seed & 1)
|
||||
{
|
||||
sh1 = (seed & 2 ? 4 : 5);
|
||||
sh2 = (partitioncount == 3 ? 6 : 5);
|
||||
}
|
||||
else
|
||||
{
|
||||
sh1 = (partitioncount == 3 ? 6 : 5);
|
||||
sh2 = (seed & 2 ? 4 : 5);
|
||||
}
|
||||
sh3 = (seed & 0x10) ? sh1 : sh2;
|
||||
|
||||
seed1 >>= sh1;
|
||||
seed2 >>= sh2;
|
||||
seed3 >>= sh1;
|
||||
seed4 >>= sh2;
|
||||
seed5 >>= sh1;
|
||||
seed6 >>= sh2;
|
||||
seed7 >>= sh1;
|
||||
seed8 >>= sh2;
|
||||
|
||||
seed9 >>= sh3;
|
||||
seed10 >>= sh3;
|
||||
seed11 >>= sh3;
|
||||
seed12 >>= sh3;
|
||||
|
||||
|
||||
|
||||
int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
|
||||
int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
|
||||
int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
|
||||
int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
|
||||
|
||||
|
||||
// apply the saw
|
||||
a &= 0x3F;
|
||||
b &= 0x3F;
|
||||
c &= 0x3F;
|
||||
d &= 0x3F;
|
||||
|
||||
// remove some of the components if we are to output < 4 partitions.
|
||||
if (partitioncount <= 3)
|
||||
d = 0;
|
||||
if (partitioncount <= 2)
|
||||
c = 0;
|
||||
if (partitioncount <= 1)
|
||||
b = 0;
|
||||
|
||||
int partition;
|
||||
if (a >= b && a >= c && a >= d)
|
||||
partition = 0;
|
||||
else if (b >= c && b >= d)
|
||||
partition = 1;
|
||||
else if (c >= d)
|
||||
partition = 2;
|
||||
else
|
||||
partition = 3;
|
||||
return partition;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void generate_one_partition_table(int xdim, int ydim, int zdim, int partition_count, int partition_index, partition_info * pt)
|
||||
{
|
||||
int small_block = (xdim * ydim * zdim) < 32;
|
||||
|
||||
uint8_t *partition_of_texel = pt->partition_of_texel;
|
||||
int x, y, z, i;
|
||||
|
||||
|
||||
for (z = 0; z < zdim; z++)
|
||||
for (y = 0; y < ydim; y++)
|
||||
for (x = 0; x < xdim; x++)
|
||||
{
|
||||
uint8_t part = select_partition(partition_index, x, y, z, partition_count, small_block);
|
||||
*partition_of_texel++ = part;
|
||||
}
|
||||
|
||||
|
||||
int texels_per_block = xdim * ydim * zdim;
|
||||
|
||||
int counts[4];
|
||||
for (i = 0; i < 4; i++)
|
||||
counts[i] = 0;
|
||||
|
||||
for (i = 0; i < texels_per_block; i++)
|
||||
{
|
||||
int partition = pt->partition_of_texel[i];
|
||||
pt->texels_of_partition[partition][counts[partition]++] = i;
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
pt->texels_per_partition[i] = counts[i];
|
||||
|
||||
if (counts[0] == 0)
|
||||
pt->partition_count = 0;
|
||||
else if (counts[1] == 0)
|
||||
pt->partition_count = 1;
|
||||
else if (counts[2] == 0)
|
||||
pt->partition_count = 2;
|
||||
else if (counts[3] == 0)
|
||||
pt->partition_count = 3;
|
||||
else
|
||||
pt->partition_count = 4;
|
||||
|
||||
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
pt->coverage_bitmaps[i] = 0ULL;
|
||||
|
||||
const block_size_descriptor *bsd = get_block_size_descriptor(xdim, ydim, zdim);
|
||||
int texels_to_process = bsd->texelcount_for_bitmap_partitioning;
|
||||
for (i = 0; i < texels_to_process; i++)
|
||||
{
|
||||
int idx = bsd->texels_for_bitmap_partitioning[i];
|
||||
pt->coverage_bitmaps[pt->partition_of_texel[idx]] |= 1ULL << i;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void generate_partition_tables(int xdim, int ydim, int zdim)
|
||||
{
|
||||
int i;
|
||||
|
||||
|
||||
partition_info *one_partition = new partition_info;
|
||||
partition_info *two_partitions = new partition_info[1024];
|
||||
partition_info *three_partitions = new partition_info[1024];
|
||||
partition_info *four_partitions = new partition_info[1024];
|
||||
|
||||
partition_info **partition_table = new partition_info *[5];
|
||||
partition_table[0] = NULL;
|
||||
partition_table[1] = one_partition;
|
||||
partition_table[2] = two_partitions;
|
||||
partition_table[3] = three_partitions;
|
||||
partition_table[4] = four_partitions;
|
||||
|
||||
generate_one_partition_table(xdim, ydim, zdim, 1, 0, one_partition);
|
||||
for (i = 0; i < 1024; i++)
|
||||
{
|
||||
generate_one_partition_table(xdim, ydim, zdim, 2, i, two_partitions + i);
|
||||
generate_one_partition_table(xdim, ydim, zdim, 3, i, three_partitions + i);
|
||||
generate_one_partition_table(xdim, ydim, zdim, 4, i, four_partitions + i);
|
||||
}
|
||||
|
||||
partition_table_zap_equal_elements(xdim, ydim, zdim, two_partitions);
|
||||
partition_table_zap_equal_elements(xdim, ydim, zdim, three_partitions);
|
||||
partition_table_zap_equal_elements(xdim, ydim, zdim, four_partitions);
|
||||
|
||||
partition_tables[xdim + 16 * ydim + 256 * zdim] = partition_table;
|
||||
}
|
||||
|
||||
|
||||
const partition_info *get_partition_table(int xdim, int ydim, int zdim, int partition_count)
|
||||
{
|
||||
int ptindex = xdim + 16 * ydim + 256 * zdim;
|
||||
if (partition_tables[ptindex] == NULL)
|
||||
generate_partition_tables(xdim, ydim, zdim);
|
||||
|
||||
return partition_tables[ptindex][partition_count];
|
||||
}
|
||||
4768
3rdparty/astc/astc_percentile_tables.cpp
vendored
Normal file
4768
3rdparty/astc/astc_percentile_tables.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
938
3rdparty/astc/astc_pick_best_endpoint_format.cpp
vendored
Normal file
938
3rdparty/astc/astc_pick_best_endpoint_format.cpp
vendored
Normal file
@@ -0,0 +1,938 @@
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This confidential and proprietary software may be used only as
|
||||
* authorised by a licensing agreement from ARM Limited
|
||||
* (C) COPYRIGHT 2011-2012 ARM Limited
|
||||
* ALL RIGHTS RESERVED
|
||||
*
|
||||
* The entire notice above must be reproduced on all authorised
|
||||
* copies and copies may only be made to the extent permitted
|
||||
* by a licensing agreement from ARM Limited.
|
||||
*
|
||||
* @brief Functions to pick the best ASTC endpoint format for a given block.
|
||||
*/
|
||||
/*----------------------------------------------------------------------------*/
|
||||
#include "astc_codec_internals.h"
|
||||
|
||||
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
#include <math.h>
|
||||
|
||||
/*
|
||||
functions to determine, for a given partitioning, which color endpoint formats are the best to use.
|
||||
|
||||
*/
|
||||
|
||||
|
||||
// for a given partition, compute for every (integer-component-count, quantization-level)
|
||||
// the color error.
|
||||
|
||||
|
||||
static void compute_color_error_for_every_integer_count_and_quantization_level(int encode_hdr_rgb, // 1 = perform HDR encoding, 0 = perform LDR encoding.
|
||||
int encode_hdr_alpha, int partition_index, const partition_info * pi,
|
||||
const encoding_choice_errors * eci, // pointer to the structure for the CURRENT partition.
|
||||
const endpoints * ep, float4 error_weightings[4],
|
||||
// arrays to return results back through.
|
||||
float best_error[21][4], int format_of_choice[21][4])
|
||||
{
|
||||
int i, j;
|
||||
int partition_size = pi->texels_per_partition[partition_index];
|
||||
|
||||
static const float baseline_quant_error[21] = {
|
||||
(65536.0f * 65536.0f / 18.0f), // 2 values, 1 step
|
||||
(65536.0f * 65536.0f / 18.0f) / (2 * 2), // 3 values, 2 steps
|
||||
(65536.0f * 65536.0f / 18.0f) / (3 * 3), // 4 values, 3 steps
|
||||
(65536.0f * 65536.0f / 18.0f) / (4 * 4), // 5 values
|
||||
(65536.0f * 65536.0f / 18.0f) / (5 * 5),
|
||||
(65536.0f * 65536.0f / 18.0f) / (7 * 7),
|
||||
(65536.0f * 65536.0f / 18.0f) / (9 * 9),
|
||||
(65536.0f * 65536.0f / 18.0f) / (11 * 11),
|
||||
(65536.0f * 65536.0f / 18.0f) / (15 * 15),
|
||||
(65536.0f * 65536.0f / 18.0f) / (19 * 19),
|
||||
(65536.0f * 65536.0f / 18.0f) / (23 * 23),
|
||||
(65536.0f * 65536.0f / 18.0f) / (31 * 31),
|
||||
(65536.0f * 65536.0f / 18.0f) / (39 * 39),
|
||||
(65536.0f * 65536.0f / 18.0f) / (47 * 47),
|
||||
(65536.0f * 65536.0f / 18.0f) / (63 * 63),
|
||||
(65536.0f * 65536.0f / 18.0f) / (79 * 79),
|
||||
(65536.0f * 65536.0f / 18.0f) / (95 * 95),
|
||||
(65536.0f * 65536.0f / 18.0f) / (127 * 127),
|
||||
(65536.0f * 65536.0f / 18.0f) / (159 * 159),
|
||||
(65536.0f * 65536.0f / 18.0f) / (191 * 191),
|
||||
(65536.0f * 65536.0f / 18.0f) / (255 * 255)
|
||||
};
|
||||
|
||||
float4 ep0 = ep->endpt0[partition_index];
|
||||
float4 ep1 = ep->endpt1[partition_index];
|
||||
|
||||
float ep0_max = MAX(MAX(ep0.x, ep0.y), ep0.z);
|
||||
float ep0_min = MIN(MIN(ep0.x, ep0.y), ep0.z);
|
||||
float ep1_max = MAX(MAX(ep1.x, ep1.y), ep1.z);
|
||||
float ep1_min = MIN(MIN(ep1.x, ep1.y), ep1.z);
|
||||
|
||||
ep0_min = MAX(ep0_min, 0.0f);
|
||||
ep1_min = MAX(ep1_min, 0.0f);
|
||||
ep0_max = MAX(ep0_max, 1e-10f);
|
||||
ep1_max = MAX(ep1_max, 1e-10f);
|
||||
|
||||
float4 error_weight = error_weightings[partition_index];
|
||||
|
||||
float error_weight_rgbsum = error_weight.x + error_weight.y + error_weight.z;
|
||||
|
||||
float range_upper_limit_rgb = encode_hdr_rgb ? 61440.0f : 65535.0f;
|
||||
float range_upper_limit_alpha = encode_hdr_alpha ? 61440.0f : 65535.0f;
|
||||
|
||||
// it is possible to get endpoint colors significantly outside [0,upper-limit]
|
||||
// even if the input data are safely contained in [0,upper-limit];
|
||||
// we need to add an error term for this situation,
|
||||
float4 ep0_range_error_high;
|
||||
float4 ep1_range_error_high;
|
||||
float4 ep0_range_error_low;
|
||||
float4 ep1_range_error_low;
|
||||
|
||||
ep0_range_error_high.x = MAX(0.0f, ep0.x - range_upper_limit_rgb);
|
||||
ep0_range_error_high.y = MAX(0.0f, ep0.y - range_upper_limit_rgb);
|
||||
ep0_range_error_high.z = MAX(0.0f, ep0.z - range_upper_limit_rgb);
|
||||
ep0_range_error_high.w = MAX(0.0f, ep0.w - range_upper_limit_alpha);
|
||||
ep1_range_error_high.x = MAX(0.0f, ep1.x - range_upper_limit_rgb);
|
||||
ep1_range_error_high.y = MAX(0.0f, ep1.y - range_upper_limit_rgb);
|
||||
ep1_range_error_high.z = MAX(0.0f, ep1.z - range_upper_limit_rgb);
|
||||
ep1_range_error_high.w = MAX(0.0f, ep1.w - range_upper_limit_alpha);
|
||||
|
||||
ep0_range_error_low.x = MIN(0.0f, ep0.x);
|
||||
ep0_range_error_low.y = MIN(0.0f, ep0.y);
|
||||
ep0_range_error_low.z = MIN(0.0f, ep0.z);
|
||||
ep0_range_error_low.w = MIN(0.0f, ep0.w);
|
||||
ep1_range_error_low.x = MIN(0.0f, ep1.x);
|
||||
ep1_range_error_low.y = MIN(0.0f, ep1.y);
|
||||
ep1_range_error_low.z = MIN(0.0f, ep1.z);
|
||||
ep1_range_error_low.w = MIN(0.0f, ep1.w);
|
||||
|
||||
float4 sum_range_error =
|
||||
(ep0_range_error_low * ep0_range_error_low) + (ep1_range_error_low * ep1_range_error_low) + (ep0_range_error_high * ep0_range_error_high) + (ep1_range_error_high * ep1_range_error_high);
|
||||
float rgb_range_error = dot(sum_range_error.xyz, error_weight.xyz) * 0.5f * partition_size;
|
||||
float alpha_range_error = sum_range_error.w * error_weight.w * 0.5f * partition_size;
|
||||
|
||||
|
||||
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
||||
if (print_diagnostics)
|
||||
{
|
||||
printf("%s : partition=%d\nrgb-error_wt=%f alpha_error_wt=%f\n", __func__, partition_index, error_weight_rgbsum, error_weight.w);
|
||||
|
||||
printf("ep0 = %f %f %f %f\n", ep0.x, ep0.y, ep0.z, ep0.w);
|
||||
printf("ep1 = %f %f %f %f\n", ep1.x, ep1.y, ep1.z, ep1.w);
|
||||
|
||||
|
||||
printf("rgb_range_error = %f, alpha_range_error = %f\n", rgb_range_error, alpha_range_error);
|
||||
|
||||
printf("rgb-luma-error: %f\n", eci->rgb_luma_error);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (encode_hdr_rgb)
|
||||
{
|
||||
|
||||
// collect some statistics
|
||||
float af, cf;
|
||||
if (ep1.x > ep1.y && ep1.x > ep1.z)
|
||||
{
|
||||
af = ep1.x;
|
||||
cf = ep1.x - ep0.x;
|
||||
}
|
||||
else if (ep1.y > ep1.z)
|
||||
{
|
||||
af = ep1.y;
|
||||
cf = ep1.y - ep0.y;
|
||||
}
|
||||
else
|
||||
{
|
||||
af = ep1.z;
|
||||
cf = ep1.z - ep0.z;
|
||||
}
|
||||
|
||||
float bf = af - ep1_min; // estimate of color-component spread in high endpoint color
|
||||
float3 prd = ep1.xyz - float3(cf, cf, cf);
|
||||
float3 pdif = prd - ep0.xyz;
|
||||
// estimate of color-component spread in low endpoint color
|
||||
float df = MAX(MAX(fabs(pdif.x), fabs(pdif.y)), fabs(pdif.z));
|
||||
|
||||
int b = (int)bf;
|
||||
int c = (int)cf;
|
||||
int d = (int)df;
|
||||
|
||||
|
||||
// determine which one of the 6 submodes is likely to be used in
|
||||
// case of an RGBO-mode
|
||||
int rgbo_mode = 5; // 7 bits per component
|
||||
// mode 4: 8 7 6
|
||||
if (b < 32768 && c < 16384)
|
||||
rgbo_mode = 4;
|
||||
// mode 3: 9 6 7
|
||||
if (b < 8192 && c < 16384)
|
||||
rgbo_mode = 3;
|
||||
// mode 2: 10 5 8
|
||||
if (b < 2048 && c < 16384)
|
||||
rgbo_mode = 2;
|
||||
// mode 1: 11 6 5
|
||||
if (b < 2048 && c < 1024)
|
||||
rgbo_mode = 1;
|
||||
// mode 0: 11 5 7
|
||||
if (b < 1024 && c < 4096)
|
||||
rgbo_mode = 0;
|
||||
|
||||
// determine which one of the 9 submodes is likely to be used in
|
||||
// case of an RGB-mode.
|
||||
int rgb_mode = 8; // 8 bits per component, except 7 bits for blue
|
||||
|
||||
// mode 0: 9 7 6 7
|
||||
if (b < 16384 && c < 8192 && d < 8192)
|
||||
rgb_mode = 0;
|
||||
// mode 1: 9 8 6 6
|
||||
if (b < 32768 && c < 8192 && d < 4096)
|
||||
rgb_mode = 1;
|
||||
// mode 2: 10 6 7 7
|
||||
if (b < 4096 && c < 8192 && d < 4096)
|
||||
rgb_mode = 2;
|
||||
// mode 3: 10 7 7 6
|
||||
if (b < 8192 && c < 8192 && d < 2048)
|
||||
rgb_mode = 3;
|
||||
// mode 4: 11 8 6 5
|
||||
if (b < 8192 && c < 2048 && d < 512)
|
||||
rgb_mode = 4;
|
||||
// mode 5: 11 6 8 6
|
||||
if (b < 2048 && c < 8192 && d < 1024)
|
||||
rgb_mode = 5;
|
||||
// mode 6: 12 7 7 5
|
||||
if (b < 2048 && c < 2048 && d < 256)
|
||||
rgb_mode = 6;
|
||||
// mode 7: 12 6 7 6
|
||||
if (b < 1024 && c < 2048 && d < 512)
|
||||
rgb_mode = 7;
|
||||
|
||||
|
||||
static const float rgbo_error_scales[6] = { 4.0f, 4.0f, 16.0f, 64.0f, 256.0f, 1024.0f };
|
||||
static const float rgb_error_scales[9] = { 64.0f, 64.0f, 16.0f, 16.0f, 4.0f, 4.0f, 1.0f, 1.0f, 384.0f };
|
||||
|
||||
float mode7mult = rgbo_error_scales[rgbo_mode] * 0.0015f; // empirically determined ....
|
||||
float mode11mult = rgb_error_scales[rgb_mode] * 0.010f; // empirically determined ....
|
||||
|
||||
|
||||
float lum_high = (ep1.x + ep1.y + ep1.z) * (1.0f / 3.0f);
|
||||
float lum_low = (ep0.x + ep0.y + ep0.z) * (1.0f / 3.0f);
|
||||
float lumdif = lum_high - lum_low;
|
||||
float mode23mult = lumdif < 960 ? 4.0f : lumdif < 3968 ? 16.0f : 128.0f;
|
||||
|
||||
mode23mult *= 0.0005f; // empirically determined ....
|
||||
|
||||
|
||||
|
||||
// pick among the available HDR endpoint modes
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
best_error[i][3] = 1e30f;
|
||||
format_of_choice[i][3] = encode_hdr_alpha ? FMT_HDR_RGBA : FMT_HDR_RGB_LDR_ALPHA;
|
||||
best_error[i][2] = 1e30f;
|
||||
format_of_choice[i][2] = FMT_HDR_RGB;
|
||||
best_error[i][1] = 1e30f;
|
||||
format_of_choice[i][1] = FMT_HDR_RGB_SCALE;
|
||||
best_error[i][0] = 1e30f;
|
||||
format_of_choice[i][0] = FMT_HDR_LUMINANCE_LARGE_RANGE;
|
||||
}
|
||||
|
||||
|
||||
for (i = 8; i < 21; i++)
|
||||
{
|
||||
// base_quant_error should depend on the scale-factor that would be used
|
||||
// during actual encode of the color value.
|
||||
|
||||
float base_quant_error = baseline_quant_error[i] * partition_size * 1.0f;
|
||||
float rgb_quantization_error = error_weight_rgbsum * base_quant_error * 2.0f;
|
||||
float alpha_quantization_error = error_weight.w * base_quant_error * 2.0f;
|
||||
float rgba_quantization_error = rgb_quantization_error + alpha_quantization_error;
|
||||
|
||||
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
||||
if (print_diagnostics)
|
||||
printf("rgba-quant = %f can_offset_encode=%d\n", rgba_quantization_error, eci->can_offset_encode);
|
||||
#endif
|
||||
|
||||
// for 8 integers, we have two encodings: one with HDR alpha and another one
|
||||
// with LDR alpha.
|
||||
|
||||
float full_hdr_rgba_error = rgba_quantization_error + rgb_range_error + alpha_range_error;
|
||||
best_error[i][3] = full_hdr_rgba_error;
|
||||
format_of_choice[i][3] = encode_hdr_alpha ? FMT_HDR_RGBA : FMT_HDR_RGB_LDR_ALPHA;
|
||||
|
||||
// for 6 integers, we have one HDR-RGB encoding
|
||||
float full_hdr_rgb_error = (rgb_quantization_error * mode11mult) + rgb_range_error + eci->alpha_drop_error;
|
||||
best_error[i][2] = full_hdr_rgb_error;
|
||||
format_of_choice[i][2] = FMT_HDR_RGB;
|
||||
|
||||
// for 4 integers, we have one HDR-RGB-Scale encoding
|
||||
float hdr_rgb_scale_error = (rgb_quantization_error * mode7mult) + rgb_range_error + eci->alpha_drop_error + eci->rgb_luma_error;
|
||||
|
||||
best_error[i][1] = hdr_rgb_scale_error;
|
||||
format_of_choice[i][1] = FMT_HDR_RGB_SCALE;
|
||||
|
||||
// for 2 integers, we assume luminance-with-large-range
|
||||
float hdr_luminance_error = (rgb_quantization_error * mode23mult) + rgb_range_error + eci->alpha_drop_error + eci->luminance_error;
|
||||
best_error[i][0] = hdr_luminance_error;
|
||||
format_of_choice[i][0] = FMT_HDR_LUMINANCE_LARGE_RANGE;
|
||||
|
||||
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
||||
if (print_diagnostics)
|
||||
{
|
||||
for (j = 0; j < 4; j++)
|
||||
{
|
||||
printf("(hdr) quant-level=%d ints=%d format=%d error=%f\n", i, j, format_of_choice[i][j], best_error[i][j]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
else
|
||||
{
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
best_error[i][3] = 1e30f;
|
||||
best_error[i][2] = 1e30f;
|
||||
best_error[i][1] = 1e30f;
|
||||
best_error[i][0] = 1e30f;
|
||||
|
||||
format_of_choice[i][3] = FMT_RGBA;
|
||||
format_of_choice[i][2] = FMT_RGB;
|
||||
format_of_choice[i][1] = FMT_RGB_SCALE;
|
||||
format_of_choice[i][0] = FMT_LUMINANCE;
|
||||
}
|
||||
|
||||
|
||||
// pick among the available LDR endpoint modes
|
||||
for (i = 4; i < 21; i++)
|
||||
{
|
||||
float base_quant_error = baseline_quant_error[i] * partition_size * 1.0f;
|
||||
float rgb_quantization_error = error_weight_rgbsum * base_quant_error;
|
||||
float alpha_quantization_error = error_weight.w * base_quant_error;
|
||||
float rgba_quantization_error = rgb_quantization_error + alpha_quantization_error;
|
||||
|
||||
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
||||
if (print_diagnostics)
|
||||
printf("rgba-quant = %f can_offset_encode=%d\n", rgba_quantization_error, eci->can_offset_encode);
|
||||
#endif
|
||||
|
||||
// for 8 integers, the available encodings are:
|
||||
// full LDR RGB-Alpha
|
||||
float full_ldr_rgba_error = rgba_quantization_error;
|
||||
if (eci->can_blue_contract)
|
||||
full_ldr_rgba_error *= 0.625f;
|
||||
if (eci->can_offset_encode && i <= 18)
|
||||
full_ldr_rgba_error *= 0.5f;
|
||||
full_ldr_rgba_error += rgb_range_error + alpha_range_error;
|
||||
|
||||
best_error[i][3] = full_ldr_rgba_error;
|
||||
format_of_choice[i][3] = FMT_RGBA;
|
||||
|
||||
// for 6 integers, we have:
|
||||
// - an LDR-RGB encoding
|
||||
// - an RGBS + Alpha encoding (LDR)
|
||||
|
||||
float full_ldr_rgb_error = rgb_quantization_error;
|
||||
if (eci->can_blue_contract)
|
||||
full_ldr_rgb_error *= 0.5f;
|
||||
if (eci->can_offset_encode && i <= 18)
|
||||
full_ldr_rgb_error *= 0.25f;
|
||||
full_ldr_rgb_error += eci->alpha_drop_error + rgb_range_error;
|
||||
|
||||
float rgbs_alpha_error = rgba_quantization_error + eci->rgb_scale_error + rgb_range_error + alpha_range_error;
|
||||
|
||||
if (rgbs_alpha_error < full_ldr_rgb_error)
|
||||
{
|
||||
best_error[i][2] = rgbs_alpha_error;
|
||||
format_of_choice[i][2] = FMT_RGB_SCALE_ALPHA;
|
||||
}
|
||||
else
|
||||
{
|
||||
best_error[i][2] = full_ldr_rgb_error;
|
||||
format_of_choice[i][2] = FMT_RGB;
|
||||
}
|
||||
|
||||
|
||||
// for 4 integers, we have a Luminance-Alpha encoding and the RGBS encoding
|
||||
float ldr_rgbs_error = rgb_quantization_error + eci->alpha_drop_error + eci->rgb_scale_error + rgb_range_error;
|
||||
|
||||
float lum_alpha_error = rgba_quantization_error + eci->luminance_error + rgb_range_error + alpha_range_error;
|
||||
|
||||
if (ldr_rgbs_error < lum_alpha_error)
|
||||
{
|
||||
best_error[i][1] = ldr_rgbs_error;
|
||||
format_of_choice[i][1] = FMT_RGB_SCALE;
|
||||
}
|
||||
else
|
||||
{
|
||||
best_error[i][1] = lum_alpha_error;
|
||||
format_of_choice[i][1] = FMT_LUMINANCE_ALPHA;
|
||||
}
|
||||
|
||||
|
||||
// for 2 integers, we have a Luminance-encoding and an Alpha-encoding.
|
||||
float luminance_error = rgb_quantization_error + eci->alpha_drop_error + eci->luminance_error + rgb_range_error;
|
||||
|
||||
best_error[i][0] = luminance_error;
|
||||
format_of_choice[i][0] = FMT_LUMINANCE;
|
||||
|
||||
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
||||
if (print_diagnostics)
|
||||
{
|
||||
for (j = 0; j < 4; j++)
|
||||
{
|
||||
printf(" (ldr) quant-level=%d ints=%d format=%d error=%f\n", i, j, format_of_choice[i][j], best_error[i][j]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// for 1 partition, find the best combination (one format + a quantization level) for a given bitcount
|
||||
|
||||
static void one_partition_find_best_combination_for_bitcount(float combined_best_error[21][4],
|
||||
int formats_of_choice[21][4], int bits_available, int *best_quantization_level, int *best_formats, float *error_of_best_combination)
|
||||
{
|
||||
int i;
|
||||
int best_integer_count = -1;
|
||||
float best_integer_count_error = 1e20f;
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
// compute the quantization level for a given number of integers and a given number of bits.
|
||||
int quantization_level = quantization_mode_table[i + 1][bits_available];
|
||||
if (quantization_level == -1)
|
||||
continue; // used to indicate the case where we don't have enough bits to represent a given endpoint format at all.
|
||||
if (combined_best_error[quantization_level][i] < best_integer_count_error)
|
||||
{
|
||||
best_integer_count_error = combined_best_error[quantization_level][i];
|
||||
best_integer_count = i;
|
||||
}
|
||||
}
|
||||
|
||||
int ql = quantization_mode_table[best_integer_count + 1][bits_available];
|
||||
|
||||
*best_quantization_level = ql;
|
||||
*error_of_best_combination = best_integer_count_error;
|
||||
if (ql >= 0)
|
||||
*best_formats = formats_of_choice[ql][best_integer_count];
|
||||
else
|
||||
*best_formats = FMT_LUMINANCE;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
// for 2 partitions, find the best format combinations for every (quantization-mode, integer-count) combination
|
||||
|
||||
static void two_partitions_find_best_combination_for_every_quantization_and_integer_count(float best_error[2][21][4], // indexed by (partition, quant-level, integer-pair-count-minus-1)
|
||||
int format_of_choice[2][21][4],
|
||||
float combined_best_error[21][7], // indexed by (quant-level, integer-pair-count-minus-2)
|
||||
int formats_of_choice[21][7][2])
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < 21; i++)
|
||||
for (j = 0; j < 7; j++)
|
||||
combined_best_error[i][j] = 1e30f;
|
||||
|
||||
int quant;
|
||||
for (quant = 5; quant < 21; quant++)
|
||||
{
|
||||
for (i = 0; i < 4; i++) // integer-count for first endpoint-pair
|
||||
{
|
||||
for (j = 0; j < 4; j++) // integer-count for second endpoint-pair
|
||||
{
|
||||
int low2 = MIN(i, j);
|
||||
int high2 = MAX(i, j);
|
||||
if ((high2 - low2) > 1)
|
||||
continue;
|
||||
|
||||
int intcnt = i + j;
|
||||
float errorterm = MIN(best_error[0][quant][i] + best_error[1][quant][j], 1e10f);
|
||||
if (errorterm <= combined_best_error[quant][intcnt])
|
||||
{
|
||||
combined_best_error[quant][intcnt] = errorterm;
|
||||
formats_of_choice[quant][intcnt][0] = format_of_choice[0][quant][i];
|
||||
formats_of_choice[quant][intcnt][1] = format_of_choice[1][quant][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// for 2 partitions, find the best combination (two formats + a quantization level) for a given bitcount
|
||||
|
||||
static void two_partitions_find_best_combination_for_bitcount(float combined_best_error[21][7],
|
||||
int formats_of_choice[21][7][2],
|
||||
int bits_available, int *best_quantization_level, int *best_quantization_level_mod, int *best_formats, float *error_of_best_combination)
|
||||
{
|
||||
int i;
|
||||
|
||||
int best_integer_count = 0;
|
||||
float best_integer_count_error = 1e20f;
|
||||
int integer_count;
|
||||
|
||||
for (integer_count = 2; integer_count <= 8; integer_count++)
|
||||
{
|
||||
// compute the quantization level for a given number of integers and a given number of bits.
|
||||
int quantization_level = quantization_mode_table[integer_count][bits_available];
|
||||
if (quantization_level == -1)
|
||||
break; // used to indicate the case where we don't have enough bits to represent a given endpoint format at all.
|
||||
float integer_count_error = combined_best_error[quantization_level][integer_count - 2];
|
||||
if (integer_count_error < best_integer_count_error)
|
||||
{
|
||||
best_integer_count_error = integer_count_error;
|
||||
best_integer_count = integer_count;
|
||||
}
|
||||
}
|
||||
|
||||
int ql = quantization_mode_table[best_integer_count][bits_available];
|
||||
int ql_mod = quantization_mode_table[best_integer_count][bits_available + 2];
|
||||
|
||||
*best_quantization_level = ql;
|
||||
*best_quantization_level_mod = ql_mod;
|
||||
*error_of_best_combination = best_integer_count_error;
|
||||
if (ql >= 0)
|
||||
{
|
||||
for (i = 0; i < 2; i++)
|
||||
best_formats[i] = formats_of_choice[ql][best_integer_count - 2][i];
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < 2; i++)
|
||||
best_formats[i] = FMT_LUMINANCE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// for 3 partitions, find the best format combinations for every (quantization-mode, integer-count) combination
|
||||
|
||||
static void three_partitions_find_best_combination_for_every_quantization_and_integer_count(float best_error[3][21][4], // indexed by (partition, quant-level, integer-count)
|
||||
int format_of_choice[3][21][4], float combined_best_error[21][10], int formats_of_choice[21][10][3])
|
||||
{
|
||||
int i, j, k;
|
||||
|
||||
for (i = 0; i < 21; i++)
|
||||
for (j = 0; j < 10; j++)
|
||||
combined_best_error[i][j] = 1e30f;
|
||||
|
||||
int quant;
|
||||
for (quant = 5; quant < 21; quant++)
|
||||
{
|
||||
for (i = 0; i < 4; i++) // integer-count for first endpoint-pair
|
||||
{
|
||||
for (j = 0; j < 4; j++) // integer-count for second endpoint-pair
|
||||
{
|
||||
int low2 = MIN(i, j);
|
||||
int high2 = MAX(i, j);
|
||||
if ((high2 - low2) > 1)
|
||||
continue;
|
||||
for (k = 0; k < 4; k++) // integer-count for third endpoint-pair
|
||||
{
|
||||
int low3 = MIN(k, low2);
|
||||
int high3 = MAX(k, high2);
|
||||
if ((high3 - low3) > 1)
|
||||
continue;
|
||||
|
||||
int intcnt = i + j + k;
|
||||
float errorterm = MIN(best_error[0][quant][i] + best_error[1][quant][j] + best_error[2][quant][k], 1e10f);
|
||||
if (errorterm <= combined_best_error[quant][intcnt])
|
||||
{
|
||||
combined_best_error[quant][intcnt] = errorterm;
|
||||
formats_of_choice[quant][intcnt][0] = format_of_choice[0][quant][i];
|
||||
formats_of_choice[quant][intcnt][1] = format_of_choice[1][quant][j];
|
||||
formats_of_choice[quant][intcnt][2] = format_of_choice[2][quant][k];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// for 3 partitions, find the best combination (three formats + a quantization level) for a given bitcount
|
||||
|
||||
static void three_partitions_find_best_combination_for_bitcount(float combined_best_error[21][10],
|
||||
int formats_of_choice[21][10][3],
|
||||
int bits_available, int *best_quantization_level, int *best_quantization_level_mod, int *best_formats, float *error_of_best_combination)
|
||||
{
|
||||
int i;
|
||||
|
||||
int best_integer_count = 0;
|
||||
float best_integer_count_error = 1e20f;
|
||||
int integer_count;
|
||||
|
||||
for (integer_count = 3; integer_count <= 9; integer_count++)
|
||||
{
|
||||
// compute the quantization level for a given number of integers and a given number of bits.
|
||||
int quantization_level = quantization_mode_table[integer_count][bits_available];
|
||||
if (quantization_level == -1)
|
||||
break; // used to indicate the case where we don't have enough bits to represent a given endpoint format at all.
|
||||
float integer_count_error = combined_best_error[quantization_level][integer_count - 3];
|
||||
if (integer_count_error < best_integer_count_error)
|
||||
{
|
||||
best_integer_count_error = integer_count_error;
|
||||
best_integer_count = integer_count;
|
||||
}
|
||||
}
|
||||
|
||||
int ql = quantization_mode_table[best_integer_count][bits_available];
|
||||
int ql_mod = quantization_mode_table[best_integer_count][bits_available + 5];
|
||||
|
||||
*best_quantization_level = ql;
|
||||
*best_quantization_level_mod = ql_mod;
|
||||
*error_of_best_combination = best_integer_count_error;
|
||||
if (ql >= 0)
|
||||
{
|
||||
for (i = 0; i < 3; i++)
|
||||
best_formats[i] = formats_of_choice[ql][best_integer_count - 3][i];
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < 3; i++)
|
||||
best_formats[i] = FMT_LUMINANCE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// for 4 partitions, find the best format combinations for every (quantization-mode, integer-count) combination
|
||||
|
||||
static void four_partitions_find_best_combination_for_every_quantization_and_integer_count(float best_error[4][21][4], // indexed by (partition, quant-level, integer-count)
|
||||
int format_of_choice[4][21][4], float combined_best_error[21][13], int formats_of_choice[21][13][4])
|
||||
{
|
||||
int i, j, k, l;
|
||||
|
||||
for (i = 0; i < 21; i++)
|
||||
for (j = 0; j < 13; j++)
|
||||
combined_best_error[i][j] = 1e30f;
|
||||
|
||||
int quant;
|
||||
for (quant = 5; quant < 21; quant++)
|
||||
{
|
||||
for (i = 0; i < 4; i++) // integer-count for first endpoint-pair
|
||||
{
|
||||
for (j = 0; j < 4; j++) // integer-count for second endpoint-pair
|
||||
{
|
||||
int low2 = MIN(i, j);
|
||||
int high2 = MAX(i, j);
|
||||
if ((high2 - low2) > 1)
|
||||
continue;
|
||||
for (k = 0; k < 4; k++) // integer-count for third endpoint-pair
|
||||
{
|
||||
int low3 = MIN(k, low2);
|
||||
int high3 = MAX(k, high2);
|
||||
if ((high3 - low3) > 1)
|
||||
continue;
|
||||
for (l = 0; l < 4; l++) // integer-count for fourth endpoint-pair
|
||||
{
|
||||
int low4 = MIN(l, low3);
|
||||
int high4 = MAX(l, high3);
|
||||
if ((high4 - low4) > 1)
|
||||
continue;
|
||||
|
||||
int intcnt = i + j + k + l;
|
||||
float errorterm = MIN(best_error[0][quant][i] + best_error[1][quant][j] + best_error[2][quant][k] + best_error[3][quant][l], 1e10f);
|
||||
if (errorterm <= combined_best_error[quant][intcnt])
|
||||
{
|
||||
combined_best_error[quant][intcnt] = errorterm;
|
||||
formats_of_choice[quant][intcnt][0] = format_of_choice[0][quant][i];
|
||||
formats_of_choice[quant][intcnt][1] = format_of_choice[1][quant][j];
|
||||
formats_of_choice[quant][intcnt][2] = format_of_choice[2][quant][k];
|
||||
formats_of_choice[quant][intcnt][3] = format_of_choice[3][quant][l];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// for 4 partitions, find the best combination (four formats + a quantization level) for a given bitcount
|
||||
|
||||
static void four_partitions_find_best_combination_for_bitcount(float combined_best_error[21][13],
|
||||
int formats_of_choice[21][13][4],
|
||||
int bits_available, int *best_quantization_level, int *best_quantization_level_mod, int *best_formats, float *error_of_best_combination)
|
||||
{
|
||||
int i;
|
||||
int best_integer_count = 0;
|
||||
float best_integer_count_error = 1e20f;
|
||||
int integer_count;
|
||||
|
||||
for (integer_count = 4; integer_count <= 9; integer_count++)
|
||||
{
|
||||
// compute the quantization level for a given number of integers and a given number of bits.
|
||||
int quantization_level = quantization_mode_table[integer_count][bits_available];
|
||||
if (quantization_level == -1)
|
||||
break; // used to indicate the case where we don't have enough bits to represent a given endpoint format at all.
|
||||
float integer_count_error = combined_best_error[quantization_level][integer_count - 4];
|
||||
if (integer_count_error < best_integer_count_error)
|
||||
{
|
||||
best_integer_count_error = integer_count_error;
|
||||
best_integer_count = integer_count;
|
||||
}
|
||||
}
|
||||
|
||||
int ql = quantization_mode_table[best_integer_count][bits_available];
|
||||
int ql_mod = quantization_mode_table[best_integer_count][bits_available + 8];
|
||||
|
||||
*best_quantization_level = ql;
|
||||
*best_quantization_level_mod = ql_mod;
|
||||
*error_of_best_combination = best_integer_count_error;
|
||||
if (ql >= 0)
|
||||
{
|
||||
for (i = 0; i < 4; i++)
|
||||
best_formats[i] = formats_of_choice[ql][best_integer_count - 4][i];
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < 4; i++)
|
||||
best_formats[i] = FMT_LUMINANCE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
The determine_optimal_set_of_endpoint_formats_to_use() function.
|
||||
|
||||
It identifies, for each mode, which set of color endpoint encodings
|
||||
produces the best overall result. It then reports back which 4 modes
|
||||
look best, along with the ideal color encoding combination for each.
|
||||
|
||||
It takes as input:
|
||||
a partitioning an imageblock,
|
||||
a set of color endpoints.
|
||||
for each mode, the number of bits available for color encoding and the error incurred by quantization.
|
||||
in case of 2 plane of weights, a specifier for which color component to use for the second plane of weights.
|
||||
|
||||
It delivers as output for each of the 4 selected modes:
|
||||
format specifier
|
||||
for each partition
|
||||
quantization level to use
|
||||
modified quantization level to use
|
||||
(when all format specifiers are equal)
|
||||
*/
|
||||
|
||||
void determine_optimal_set_of_endpoint_formats_to_use(int xdim, int ydim, int zdim,
|
||||
const partition_info * pt, const imageblock * blk, const error_weight_block * ewb,
|
||||
const endpoints * ep,
|
||||
int separate_component, // separate color component for 2-plane mode; -1 for single-plane mode
|
||||
// bitcounts and errors computed for the various quantization methods
|
||||
const int *qwt_bitcounts, const float *qwt_errors,
|
||||
// output data
|
||||
int partition_format_specifiers[4][4], int quantized_weight[4],
|
||||
int quantization_level[4], int quantization_level_mod[4])
|
||||
{
|
||||
int i, j;
|
||||
int partition_count = pt->partition_count;
|
||||
|
||||
int encode_hdr_rgb = blk->rgb_lns[0];
|
||||
int encode_hdr_alpha = blk->alpha_lns[0];
|
||||
|
||||
|
||||
// call a helper function to compute the errors that result from various
|
||||
// encoding choices (such as using luminance instead of RGB, discarding Alpha,
|
||||
// using RGB-scale in place of two separate RGB endpoints and so on)
|
||||
encoding_choice_errors eci[4];
|
||||
compute_encoding_choice_errors(xdim, ydim, zdim, blk, pt, ewb, separate_component, eci);
|
||||
|
||||
// for each partition, compute the error weights to apply for that partition.
|
||||
float4 error_weightings[4];
|
||||
float4 dummied_color_scalefactors[4]; // only used to receive data
|
||||
compute_partition_error_color_weightings(xdim, ydim, zdim, ewb, pt, error_weightings, dummied_color_scalefactors);
|
||||
|
||||
|
||||
float best_error[4][21][4];
|
||||
int format_of_choice[4][21][4];
|
||||
for (i = 0; i < partition_count; i++)
|
||||
compute_color_error_for_every_integer_count_and_quantization_level(encode_hdr_rgb, encode_hdr_alpha, i, pt, &(eci[i]), ep, error_weightings, best_error[i], format_of_choice[i]);
|
||||
|
||||
float errors_of_best_combination[MAX_WEIGHT_MODES];
|
||||
int best_quantization_levels[MAX_WEIGHT_MODES];
|
||||
int best_quantization_levels_mod[MAX_WEIGHT_MODES];
|
||||
int best_ep_formats[MAX_WEIGHT_MODES][4];
|
||||
|
||||
// code for the case where the block contains 1 partition
|
||||
if (partition_count == 1)
|
||||
{
|
||||
int best_quantization_level;
|
||||
int best_format;
|
||||
float error_of_best_combination;
|
||||
for (i = 0; i < MAX_WEIGHT_MODES; i++)
|
||||
{
|
||||
if (qwt_errors[i] >= 1e29f)
|
||||
{
|
||||
errors_of_best_combination[i] = 1e30f;
|
||||
continue;
|
||||
}
|
||||
|
||||
one_partition_find_best_combination_for_bitcount(best_error[0], format_of_choice[0], qwt_bitcounts[i], &best_quantization_level, &best_format, &error_of_best_combination);
|
||||
error_of_best_combination += qwt_errors[i];
|
||||
|
||||
errors_of_best_combination[i] = error_of_best_combination;
|
||||
best_quantization_levels[i] = best_quantization_level;
|
||||
best_quantization_levels_mod[i] = best_quantization_level;
|
||||
best_ep_formats[i][0] = best_format;
|
||||
}
|
||||
}
|
||||
|
||||
// code for the case where the block contains 2 partitions
|
||||
else if (partition_count == 2)
|
||||
{
|
||||
int best_quantization_level;
|
||||
int best_quantization_level_mod;
|
||||
int best_formats[2];
|
||||
float error_of_best_combination;
|
||||
|
||||
float combined_best_error[21][7];
|
||||
int formats_of_choice[21][7][2];
|
||||
|
||||
two_partitions_find_best_combination_for_every_quantization_and_integer_count(best_error, format_of_choice, combined_best_error, formats_of_choice);
|
||||
|
||||
|
||||
for (i = 0; i < MAX_WEIGHT_MODES; i++)
|
||||
{
|
||||
if (qwt_errors[i] >= 1e29f)
|
||||
{
|
||||
errors_of_best_combination[i] = 1e30f;
|
||||
continue;
|
||||
}
|
||||
|
||||
two_partitions_find_best_combination_for_bitcount(combined_best_error, formats_of_choice, qwt_bitcounts[i],
|
||||
&best_quantization_level, &best_quantization_level_mod, best_formats, &error_of_best_combination);
|
||||
|
||||
error_of_best_combination += qwt_errors[i];
|
||||
|
||||
errors_of_best_combination[i] = error_of_best_combination;
|
||||
best_quantization_levels[i] = best_quantization_level;
|
||||
best_quantization_levels_mod[i] = best_quantization_level_mod;
|
||||
best_ep_formats[i][0] = best_formats[0];
|
||||
best_ep_formats[i][1] = best_formats[1];
|
||||
}
|
||||
}
|
||||
|
||||
// code for the case where the block contains 3 partitions
|
||||
else if (partition_count == 3)
|
||||
{
|
||||
int best_quantization_level;
|
||||
int best_quantization_level_mod;
|
||||
int best_formats[3];
|
||||
float error_of_best_combination;
|
||||
|
||||
float combined_best_error[21][10];
|
||||
int formats_of_choice[21][10][3];
|
||||
|
||||
three_partitions_find_best_combination_for_every_quantization_and_integer_count(best_error, format_of_choice, combined_best_error, formats_of_choice);
|
||||
|
||||
for (i = 0; i < MAX_WEIGHT_MODES; i++)
|
||||
{
|
||||
if (qwt_errors[i] >= 1e29f)
|
||||
{
|
||||
errors_of_best_combination[i] = 1e30f;
|
||||
continue;
|
||||
}
|
||||
|
||||
three_partitions_find_best_combination_for_bitcount(combined_best_error,
|
||||
formats_of_choice, qwt_bitcounts[i], &best_quantization_level, &best_quantization_level_mod, best_formats, &error_of_best_combination);
|
||||
error_of_best_combination += qwt_errors[i];
|
||||
|
||||
errors_of_best_combination[i] = error_of_best_combination;
|
||||
best_quantization_levels[i] = best_quantization_level;
|
||||
best_quantization_levels_mod[i] = best_quantization_level_mod;
|
||||
best_ep_formats[i][0] = best_formats[0];
|
||||
best_ep_formats[i][1] = best_formats[1];
|
||||
best_ep_formats[i][2] = best_formats[2];
|
||||
}
|
||||
}
|
||||
|
||||
// code for the case where the block contains 4 partitions
|
||||
else if (partition_count == 4)
|
||||
{
|
||||
int best_quantization_level;
|
||||
int best_quantization_level_mod;
|
||||
int best_formats[4];
|
||||
float error_of_best_combination;
|
||||
|
||||
float combined_best_error[21][13];
|
||||
int formats_of_choice[21][13][4];
|
||||
|
||||
four_partitions_find_best_combination_for_every_quantization_and_integer_count(best_error, format_of_choice, combined_best_error, formats_of_choice);
|
||||
|
||||
for (i = 0; i < MAX_WEIGHT_MODES; i++)
|
||||
{
|
||||
if (qwt_errors[i] >= 1e29f)
|
||||
{
|
||||
errors_of_best_combination[i] = 1e30f;
|
||||
continue;
|
||||
}
|
||||
four_partitions_find_best_combination_for_bitcount(combined_best_error,
|
||||
formats_of_choice, qwt_bitcounts[i], &best_quantization_level, &best_quantization_level_mod, best_formats, &error_of_best_combination);
|
||||
error_of_best_combination += qwt_errors[i];
|
||||
|
||||
errors_of_best_combination[i] = error_of_best_combination;
|
||||
best_quantization_levels[i] = best_quantization_level;
|
||||
best_quantization_levels_mod[i] = best_quantization_level_mod;
|
||||
best_ep_formats[i][0] = best_formats[0];
|
||||
best_ep_formats[i][1] = best_formats[1];
|
||||
best_ep_formats[i][2] = best_formats[2];
|
||||
best_ep_formats[i][3] = best_formats[3];
|
||||
}
|
||||
}
|
||||
|
||||
// finally, go through the results and pick the 4 best-looking modes.
|
||||
|
||||
int best_error_weights[4];
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
float best_ep_error = 1e30f;
|
||||
int best_error_index = -1;
|
||||
for (j = 0; j < MAX_WEIGHT_MODES; j++)
|
||||
{
|
||||
if (errors_of_best_combination[j] < best_ep_error && best_quantization_levels[j] >= 5)
|
||||
{
|
||||
best_ep_error = errors_of_best_combination[j];
|
||||
best_error_index = j;
|
||||
}
|
||||
}
|
||||
best_error_weights[i] = best_error_index;
|
||||
|
||||
if(best_error_index >= 0)
|
||||
{
|
||||
errors_of_best_combination[best_error_index] = 1e30f;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
quantized_weight[i] = best_error_weights[i];
|
||||
if (quantized_weight[i] >= 0)
|
||||
{
|
||||
quantization_level[i] = best_quantization_levels[best_error_weights[i]];
|
||||
quantization_level_mod[i] = best_quantization_levels_mod[best_error_weights[i]];
|
||||
for (j = 0; j < partition_count; j++)
|
||||
{
|
||||
partition_format_specifiers[i][j] = best_ep_formats[best_error_weights[i]][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
558
3rdparty/astc/astc_quantization.cpp
vendored
Normal file
558
3rdparty/astc/astc_quantization.cpp
vendored
Normal file
@@ -0,0 +1,558 @@
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This confidential and proprietary software may be used only as
|
||||
* authorised by a licensing agreement from ARM Limited
|
||||
* (C) COPYRIGHT 2011-2012 ARM Limited
|
||||
* ALL RIGHTS RESERVED
|
||||
*
|
||||
* The entire notice above must be reproduced on all authorised
|
||||
* copies and copies may only be made to the extent permitted
|
||||
* by a licensing agreement from ARM Limited.
|
||||
*
|
||||
* @brief Functions and data table related to data quantization in ASTC.
|
||||
*/
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
#include "astc_codec_internals.h"
|
||||
|
||||
const uint8_t color_quantization_tables[21][256] = {
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 10, 10, 10, 10, 10, 10, 10,
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
|
||||
12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
|
||||
13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
|
||||
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
10, 10, 10, 10, 10, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
|
||||
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 15, 15, 15,
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
|
||||
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1, 1, 1, 1,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
10, 10, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 12, 12, 12, 12, 12, 12, 12, 12,
|
||||
12, 12, 12, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
|
||||
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 15, 15, 15, 15,
|
||||
15, 15, 15, 15, 15, 15, 15, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 13, 13, 13,
|
||||
13, 13, 13, 13, 13, 13, 13, 13, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1, 1, 1,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10,
|
||||
10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12,
|
||||
12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||
16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17,
|
||||
18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19,
|
||||
19, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23,
|
||||
23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25,
|
||||
25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
|
||||
27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29,
|
||||
29, 29, 29, 30, 30, 30, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16,
|
||||
16, 24, 24, 24, 24, 24, 24, 32, 32, 32, 32, 32, 32, 32, 2, 2,
|
||||
2, 2, 2, 2, 10, 10, 10, 10, 10, 10, 10, 18, 18, 18, 18, 18,
|
||||
18, 26, 26, 26, 26, 26, 26, 26, 34, 34, 34, 34, 34, 34, 4, 4,
|
||||
4, 4, 4, 4, 4, 12, 12, 12, 12, 12, 12, 20, 20, 20, 20, 20,
|
||||
20, 20, 28, 28, 28, 28, 28, 28, 36, 36, 36, 36, 36, 36, 36, 6,
|
||||
6, 6, 6, 6, 6, 14, 14, 14, 14, 14, 14, 14, 22, 22, 22, 22,
|
||||
22, 22, 30, 30, 30, 30, 30, 30, 30, 38, 38, 38, 38, 38, 38, 38,
|
||||
39, 39, 39, 39, 39, 39, 39, 31, 31, 31, 31, 31, 31, 31, 23, 23,
|
||||
23, 23, 23, 23, 15, 15, 15, 15, 15, 15, 15, 7, 7, 7, 7, 7,
|
||||
7, 37, 37, 37, 37, 37, 37, 37, 29, 29, 29, 29, 29, 29, 21, 21,
|
||||
21, 21, 21, 21, 21, 13, 13, 13, 13, 13, 13, 5, 5, 5, 5, 5,
|
||||
5, 5, 35, 35, 35, 35, 35, 35, 27, 27, 27, 27, 27, 27, 27, 19,
|
||||
19, 19, 19, 19, 19, 11, 11, 11, 11, 11, 11, 11, 3, 3, 3, 3,
|
||||
3, 3, 33, 33, 33, 33, 33, 33, 33, 25, 25, 25, 25, 25, 25, 17,
|
||||
17, 17, 17, 17, 17, 17, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 16, 16, 16, 16, 16, 16, 32, 32, 32, 32, 32, 2, 2,
|
||||
2, 2, 2, 18, 18, 18, 18, 18, 18, 34, 34, 34, 34, 34, 4, 4,
|
||||
4, 4, 4, 4, 20, 20, 20, 20, 20, 36, 36, 36, 36, 36, 6, 6,
|
||||
6, 6, 6, 6, 22, 22, 22, 22, 22, 38, 38, 38, 38, 38, 38, 8,
|
||||
8, 8, 8, 8, 24, 24, 24, 24, 24, 24, 40, 40, 40, 40, 40, 10,
|
||||
10, 10, 10, 10, 26, 26, 26, 26, 26, 26, 42, 42, 42, 42, 42, 12,
|
||||
12, 12, 12, 12, 12, 28, 28, 28, 28, 28, 44, 44, 44, 44, 44, 14,
|
||||
14, 14, 14, 14, 14, 30, 30, 30, 30, 30, 46, 46, 46, 46, 46, 46,
|
||||
47, 47, 47, 47, 47, 47, 31, 31, 31, 31, 31, 15, 15, 15, 15, 15,
|
||||
15, 45, 45, 45, 45, 45, 29, 29, 29, 29, 29, 13, 13, 13, 13, 13,
|
||||
13, 43, 43, 43, 43, 43, 27, 27, 27, 27, 27, 27, 11, 11, 11, 11,
|
||||
11, 41, 41, 41, 41, 41, 25, 25, 25, 25, 25, 25, 9, 9, 9, 9,
|
||||
9, 39, 39, 39, 39, 39, 39, 23, 23, 23, 23, 23, 7, 7, 7, 7,
|
||||
7, 7, 37, 37, 37, 37, 37, 21, 21, 21, 21, 21, 5, 5, 5, 5,
|
||||
5, 5, 35, 35, 35, 35, 35, 19, 19, 19, 19, 19, 19, 3, 3, 3,
|
||||
3, 3, 33, 33, 33, 33, 33, 17, 17, 17, 17, 17, 17, 1, 1, 1,
|
||||
},
|
||||
{
|
||||
0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4,
|
||||
4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8,
|
||||
8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12,
|
||||
12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16,
|
||||
16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19,
|
||||
20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23,
|
||||
24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27,
|
||||
28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31,
|
||||
32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35,
|
||||
36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39,
|
||||
40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43,
|
||||
44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47,
|
||||
47, 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51,
|
||||
51, 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55,
|
||||
55, 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59,
|
||||
59, 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63,
|
||||
},
|
||||
{
|
||||
0, 0, 16, 16, 16, 32, 32, 32, 48, 48, 48, 48, 64, 64, 64, 2,
|
||||
2, 2, 18, 18, 18, 34, 34, 34, 50, 50, 50, 50, 66, 66, 66, 4,
|
||||
4, 4, 20, 20, 20, 36, 36, 36, 36, 52, 52, 52, 68, 68, 68, 6,
|
||||
6, 6, 22, 22, 22, 38, 38, 38, 38, 54, 54, 54, 70, 70, 70, 8,
|
||||
8, 8, 24, 24, 24, 24, 40, 40, 40, 56, 56, 56, 72, 72, 72, 10,
|
||||
10, 10, 26, 26, 26, 26, 42, 42, 42, 58, 58, 58, 74, 74, 74, 12,
|
||||
12, 12, 12, 28, 28, 28, 44, 44, 44, 60, 60, 60, 76, 76, 76, 14,
|
||||
14, 14, 14, 30, 30, 30, 46, 46, 46, 62, 62, 62, 78, 78, 78, 78,
|
||||
79, 79, 79, 79, 63, 63, 63, 47, 47, 47, 31, 31, 31, 15, 15, 15,
|
||||
15, 77, 77, 77, 61, 61, 61, 45, 45, 45, 29, 29, 29, 13, 13, 13,
|
||||
13, 75, 75, 75, 59, 59, 59, 43, 43, 43, 27, 27, 27, 27, 11, 11,
|
||||
11, 73, 73, 73, 57, 57, 57, 41, 41, 41, 25, 25, 25, 25, 9, 9,
|
||||
9, 71, 71, 71, 55, 55, 55, 39, 39, 39, 39, 23, 23, 23, 7, 7,
|
||||
7, 69, 69, 69, 53, 53, 53, 37, 37, 37, 37, 21, 21, 21, 5, 5,
|
||||
5, 67, 67, 67, 51, 51, 51, 51, 35, 35, 35, 19, 19, 19, 3, 3,
|
||||
3, 65, 65, 65, 49, 49, 49, 49, 33, 33, 33, 17, 17, 17, 1, 1,
|
||||
},
|
||||
{
|
||||
0, 0, 32, 32, 64, 64, 64, 2, 2, 2, 34, 34, 66, 66, 66, 4,
|
||||
4, 4, 36, 36, 68, 68, 68, 6, 6, 6, 38, 38, 70, 70, 70, 8,
|
||||
8, 8, 40, 40, 40, 72, 72, 10, 10, 10, 42, 42, 42, 74, 74, 12,
|
||||
12, 12, 44, 44, 44, 76, 76, 14, 14, 14, 46, 46, 46, 78, 78, 16,
|
||||
16, 16, 48, 48, 48, 80, 80, 80, 18, 18, 50, 50, 50, 82, 82, 82,
|
||||
20, 20, 52, 52, 52, 84, 84, 84, 22, 22, 54, 54, 54, 86, 86, 86,
|
||||
24, 24, 56, 56, 56, 88, 88, 88, 26, 26, 58, 58, 58, 90, 90, 90,
|
||||
28, 28, 60, 60, 60, 92, 92, 92, 30, 30, 62, 62, 62, 94, 94, 94,
|
||||
95, 95, 95, 63, 63, 63, 31, 31, 93, 93, 93, 61, 61, 61, 29, 29,
|
||||
91, 91, 91, 59, 59, 59, 27, 27, 89, 89, 89, 57, 57, 57, 25, 25,
|
||||
87, 87, 87, 55, 55, 55, 23, 23, 85, 85, 85, 53, 53, 53, 21, 21,
|
||||
83, 83, 83, 51, 51, 51, 19, 19, 81, 81, 81, 49, 49, 49, 17, 17,
|
||||
17, 79, 79, 47, 47, 47, 15, 15, 15, 77, 77, 45, 45, 45, 13, 13,
|
||||
13, 75, 75, 43, 43, 43, 11, 11, 11, 73, 73, 41, 41, 41, 9, 9,
|
||||
9, 71, 71, 71, 39, 39, 7, 7, 7, 69, 69, 69, 37, 37, 5, 5,
|
||||
5, 67, 67, 67, 35, 35, 3, 3, 3, 65, 65, 65, 33, 33, 1, 1,
|
||||
},
|
||||
{
|
||||
0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7,
|
||||
8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15,
|
||||
16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23,
|
||||
24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30, 30, 31, 31,
|
||||
32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37, 38, 38, 39, 39,
|
||||
40, 40, 41, 41, 42, 42, 43, 43, 44, 44, 45, 45, 46, 46, 47, 47,
|
||||
48, 48, 49, 49, 50, 50, 51, 51, 52, 52, 53, 53, 54, 54, 55, 55,
|
||||
56, 56, 57, 57, 58, 58, 59, 59, 60, 60, 61, 61, 62, 62, 63, 63,
|
||||
64, 64, 65, 65, 66, 66, 67, 67, 68, 68, 69, 69, 70, 70, 71, 71,
|
||||
72, 72, 73, 73, 74, 74, 75, 75, 76, 76, 77, 77, 78, 78, 79, 79,
|
||||
80, 80, 81, 81, 82, 82, 83, 83, 84, 84, 85, 85, 86, 86, 87, 87,
|
||||
88, 88, 89, 89, 90, 90, 91, 91, 92, 92, 93, 93, 94, 94, 95, 95,
|
||||
96, 96, 97, 97, 98, 98, 99, 99, 100, 100, 101, 101, 102, 102, 103, 103,
|
||||
104, 104, 105, 105, 106, 106, 107, 107, 108, 108, 109, 109, 110, 110, 111, 111,
|
||||
112, 112, 113, 113, 114, 114, 115, 115, 116, 116, 117, 117, 118, 118, 119, 119,
|
||||
120, 120, 121, 121, 122, 122, 123, 123, 124, 124, 125, 125, 126, 126, 127, 127,
|
||||
},
|
||||
{
|
||||
0, 32, 32, 64, 96, 96, 128, 128, 2, 34, 34, 66, 98, 98, 130, 130,
|
||||
4, 36, 36, 68, 100, 100, 132, 132, 6, 38, 38, 70, 102, 102, 134, 134,
|
||||
8, 40, 40, 72, 104, 104, 136, 136, 10, 42, 42, 74, 106, 106, 138, 138,
|
||||
12, 44, 44, 76, 108, 108, 140, 140, 14, 46, 46, 78, 110, 110, 142, 142,
|
||||
16, 48, 48, 80, 112, 112, 144, 144, 18, 50, 50, 82, 114, 114, 146, 146,
|
||||
20, 52, 52, 84, 116, 116, 148, 148, 22, 54, 54, 86, 118, 118, 150, 150,
|
||||
24, 56, 56, 88, 120, 120, 152, 152, 26, 58, 58, 90, 122, 122, 154, 154,
|
||||
28, 60, 60, 92, 124, 124, 156, 156, 30, 62, 62, 94, 126, 126, 158, 158,
|
||||
159, 159, 127, 127, 95, 63, 63, 31, 157, 157, 125, 125, 93, 61, 61, 29,
|
||||
155, 155, 123, 123, 91, 59, 59, 27, 153, 153, 121, 121, 89, 57, 57, 25,
|
||||
151, 151, 119, 119, 87, 55, 55, 23, 149, 149, 117, 117, 85, 53, 53, 21,
|
||||
147, 147, 115, 115, 83, 51, 51, 19, 145, 145, 113, 113, 81, 49, 49, 17,
|
||||
143, 143, 111, 111, 79, 47, 47, 15, 141, 141, 109, 109, 77, 45, 45, 13,
|
||||
139, 139, 107, 107, 75, 43, 43, 11, 137, 137, 105, 105, 73, 41, 41, 9,
|
||||
135, 135, 103, 103, 71, 39, 39, 7, 133, 133, 101, 101, 69, 37, 37, 5,
|
||||
131, 131, 99, 99, 67, 35, 35, 3, 129, 129, 97, 97, 65, 33, 33, 1,
|
||||
},
|
||||
{
|
||||
0, 64, 128, 128, 2, 66, 130, 130, 4, 68, 132, 132, 6, 70, 134, 134,
|
||||
8, 72, 136, 136, 10, 74, 138, 138, 12, 76, 140, 140, 14, 78, 142, 142,
|
||||
16, 80, 144, 144, 18, 82, 146, 146, 20, 84, 148, 148, 22, 86, 150, 150,
|
||||
24, 88, 152, 152, 26, 90, 154, 154, 28, 92, 156, 156, 30, 94, 158, 158,
|
||||
32, 96, 160, 160, 34, 98, 162, 162, 36, 100, 164, 164, 38, 102, 166, 166,
|
||||
40, 104, 168, 168, 42, 106, 170, 170, 44, 108, 172, 172, 46, 110, 174, 174,
|
||||
48, 112, 176, 176, 50, 114, 178, 178, 52, 116, 180, 180, 54, 118, 182, 182,
|
||||
56, 120, 184, 184, 58, 122, 186, 186, 60, 124, 188, 188, 62, 126, 190, 190,
|
||||
191, 191, 127, 63, 189, 189, 125, 61, 187, 187, 123, 59, 185, 185, 121, 57,
|
||||
183, 183, 119, 55, 181, 181, 117, 53, 179, 179, 115, 51, 177, 177, 113, 49,
|
||||
175, 175, 111, 47, 173, 173, 109, 45, 171, 171, 107, 43, 169, 169, 105, 41,
|
||||
167, 167, 103, 39, 165, 165, 101, 37, 163, 163, 99, 35, 161, 161, 97, 33,
|
||||
159, 159, 95, 31, 157, 157, 93, 29, 155, 155, 91, 27, 153, 153, 89, 25,
|
||||
151, 151, 87, 23, 149, 149, 85, 21, 147, 147, 83, 19, 145, 145, 81, 17,
|
||||
143, 143, 79, 15, 141, 141, 77, 13, 139, 139, 75, 11, 137, 137, 73, 9,
|
||||
135, 135, 71, 7, 133, 133, 69, 5, 131, 131, 67, 3, 129, 129, 65, 1,
|
||||
},
|
||||
{
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
||||
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
|
||||
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
|
||||
96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
|
||||
112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
|
||||
128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
|
||||
144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
|
||||
160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
|
||||
176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
|
||||
192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
|
||||
208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
|
||||
224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
|
||||
240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255,
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
const uint8_t color_unquantization_tables[21][256] = {
|
||||
{
|
||||
0, 255,
|
||||
},
|
||||
{
|
||||
0, 128, 255,
|
||||
},
|
||||
{
|
||||
0, 85, 170, 255,
|
||||
},
|
||||
{
|
||||
0, 64, 128, 192, 255,
|
||||
},
|
||||
{
|
||||
0, 255, 51, 204, 102, 153,
|
||||
},
|
||||
{
|
||||
0, 36, 73, 109, 146, 182, 219, 255,
|
||||
},
|
||||
{
|
||||
0, 255, 28, 227, 56, 199, 84, 171, 113, 142,
|
||||
},
|
||||
{
|
||||
0, 255, 69, 186, 23, 232, 92, 163, 46, 209, 116, 139,
|
||||
},
|
||||
{
|
||||
0, 17, 34, 51, 68, 85, 102, 119, 136, 153, 170, 187, 204, 221, 238, 255,
|
||||
},
|
||||
{
|
||||
0, 255, 67, 188, 13, 242, 80, 175, 27, 228, 94, 161, 40, 215, 107, 148,
|
||||
54, 201, 121, 134,
|
||||
},
|
||||
{
|
||||
0, 255, 33, 222, 66, 189, 99, 156, 11, 244, 44, 211, 77, 178, 110, 145,
|
||||
22, 233, 55, 200, 88, 167, 121, 134,
|
||||
},
|
||||
{
|
||||
0, 8, 16, 24, 33, 41, 49, 57, 66, 74, 82, 90, 99, 107, 115, 123,
|
||||
132, 140, 148, 156, 165, 173, 181, 189, 198, 206, 214, 222, 231, 239, 247, 255,
|
||||
},
|
||||
{
|
||||
0, 255, 32, 223, 65, 190, 97, 158, 6, 249, 39, 216, 71, 184, 104, 151,
|
||||
13, 242, 45, 210, 78, 177, 110, 145, 19, 236, 52, 203, 84, 171, 117, 138,
|
||||
26, 229, 58, 197, 91, 164, 123, 132,
|
||||
},
|
||||
{
|
||||
0, 255, 16, 239, 32, 223, 48, 207, 65, 190, 81, 174, 97, 158, 113, 142,
|
||||
5, 250, 21, 234, 38, 217, 54, 201, 70, 185, 86, 169, 103, 152, 119, 136,
|
||||
11, 244, 27, 228, 43, 212, 59, 196, 76, 179, 92, 163, 108, 147, 124, 131,
|
||||
},
|
||||
{
|
||||
0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60,
|
||||
65, 69, 73, 77, 81, 85, 89, 93, 97, 101, 105, 109, 113, 117, 121, 125,
|
||||
130, 134, 138, 142, 146, 150, 154, 158, 162, 166, 170, 174, 178, 182, 186, 190,
|
||||
195, 199, 203, 207, 211, 215, 219, 223, 227, 231, 235, 239, 243, 247, 251, 255,
|
||||
},
|
||||
{
|
||||
0, 255, 16, 239, 32, 223, 48, 207, 64, 191, 80, 175, 96, 159, 112, 143,
|
||||
3, 252, 19, 236, 35, 220, 51, 204, 67, 188, 83, 172, 100, 155, 116, 139,
|
||||
6, 249, 22, 233, 38, 217, 54, 201, 71, 184, 87, 168, 103, 152, 119, 136,
|
||||
9, 246, 25, 230, 42, 213, 58, 197, 74, 181, 90, 165, 106, 149, 122, 133,
|
||||
13, 242, 29, 226, 45, 210, 61, 194, 77, 178, 93, 162, 109, 146, 125, 130,
|
||||
},
|
||||
{
|
||||
0, 255, 8, 247, 16, 239, 24, 231, 32, 223, 40, 215, 48, 207, 56, 199,
|
||||
64, 191, 72, 183, 80, 175, 88, 167, 96, 159, 104, 151, 112, 143, 120, 135,
|
||||
2, 253, 10, 245, 18, 237, 26, 229, 35, 220, 43, 212, 51, 204, 59, 196,
|
||||
67, 188, 75, 180, 83, 172, 91, 164, 99, 156, 107, 148, 115, 140, 123, 132,
|
||||
5, 250, 13, 242, 21, 234, 29, 226, 37, 218, 45, 210, 53, 202, 61, 194,
|
||||
70, 185, 78, 177, 86, 169, 94, 161, 102, 153, 110, 145, 118, 137, 126, 129,
|
||||
},
|
||||
{
|
||||
0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30,
|
||||
32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62,
|
||||
64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94,
|
||||
96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126,
|
||||
129, 131, 133, 135, 137, 139, 141, 143, 145, 147, 149, 151, 153, 155, 157, 159,
|
||||
161, 163, 165, 167, 169, 171, 173, 175, 177, 179, 181, 183, 185, 187, 189, 191,
|
||||
193, 195, 197, 199, 201, 203, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223,
|
||||
225, 227, 229, 231, 233, 235, 237, 239, 241, 243, 245, 247, 249, 251, 253, 255,
|
||||
},
|
||||
{
|
||||
0, 255, 8, 247, 16, 239, 24, 231, 32, 223, 40, 215, 48, 207, 56, 199,
|
||||
64, 191, 72, 183, 80, 175, 88, 167, 96, 159, 104, 151, 112, 143, 120, 135,
|
||||
1, 254, 9, 246, 17, 238, 25, 230, 33, 222, 41, 214, 49, 206, 57, 198,
|
||||
65, 190, 73, 182, 81, 174, 89, 166, 97, 158, 105, 150, 113, 142, 121, 134,
|
||||
3, 252, 11, 244, 19, 236, 27, 228, 35, 220, 43, 212, 51, 204, 59, 196,
|
||||
67, 188, 75, 180, 83, 172, 91, 164, 99, 156, 107, 148, 115, 140, 123, 132,
|
||||
4, 251, 12, 243, 20, 235, 28, 227, 36, 219, 44, 211, 52, 203, 60, 195,
|
||||
68, 187, 76, 179, 84, 171, 92, 163, 100, 155, 108, 147, 116, 139, 124, 131,
|
||||
6, 249, 14, 241, 22, 233, 30, 225, 38, 217, 46, 209, 54, 201, 62, 193,
|
||||
70, 185, 78, 177, 86, 169, 94, 161, 102, 153, 110, 145, 118, 137, 126, 129,
|
||||
},
|
||||
{
|
||||
0, 255, 4, 251, 8, 247, 12, 243, 16, 239, 20, 235, 24, 231, 28, 227,
|
||||
32, 223, 36, 219, 40, 215, 44, 211, 48, 207, 52, 203, 56, 199, 60, 195,
|
||||
64, 191, 68, 187, 72, 183, 76, 179, 80, 175, 84, 171, 88, 167, 92, 163,
|
||||
96, 159, 100, 155, 104, 151, 108, 147, 112, 143, 116, 139, 120, 135, 124, 131,
|
||||
1, 254, 5, 250, 9, 246, 13, 242, 17, 238, 21, 234, 25, 230, 29, 226,
|
||||
33, 222, 37, 218, 41, 214, 45, 210, 49, 206, 53, 202, 57, 198, 61, 194,
|
||||
65, 190, 69, 186, 73, 182, 77, 178, 81, 174, 85, 170, 89, 166, 93, 162,
|
||||
97, 158, 101, 154, 105, 150, 109, 146, 113, 142, 117, 138, 121, 134, 125, 130,
|
||||
2, 253, 6, 249, 10, 245, 14, 241, 18, 237, 22, 233, 26, 229, 30, 225,
|
||||
34, 221, 38, 217, 42, 213, 46, 209, 50, 205, 54, 201, 58, 197, 62, 193,
|
||||
66, 189, 70, 185, 74, 181, 78, 177, 82, 173, 86, 169, 90, 165, 94, 161,
|
||||
98, 157, 102, 153, 106, 149, 110, 145, 114, 141, 118, 137, 122, 133, 126, 129,
|
||||
},
|
||||
{
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
||||
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
|
||||
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
|
||||
96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
|
||||
112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
|
||||
128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
|
||||
144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
|
||||
160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
|
||||
176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
|
||||
192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
|
||||
208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
|
||||
224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
|
||||
240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255,
|
||||
},
|
||||
};
|
||||
|
||||
// quantization_mode_table[integercount/2][bits] gives
|
||||
// us the quantization level for a given integer count and number of bits that
|
||||
// the integer may fit into. This is needed for color decoding,
|
||||
// and for the color encoding.
|
||||
int quantization_mode_table[17][128];
|
||||
|
||||
void build_quantization_mode_table(void)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i <= 16; i++)
|
||||
for (j = 0; j < 128; j++)
|
||||
quantization_mode_table[i][j] = -1;
|
||||
|
||||
for (i = 0; i < 21; i++)
|
||||
for (j = 1; j <= 16; j++)
|
||||
{
|
||||
int p = compute_ise_bitcount(2 * j, (quantization_method) i);
|
||||
if (p < 128)
|
||||
quantization_mode_table[j][p] = i;
|
||||
}
|
||||
for (i = 0; i <= 16; i++)
|
||||
{
|
||||
int largest_value_so_far = -1;
|
||||
for (j = 0; j < 128; j++)
|
||||
{
|
||||
if (quantization_mode_table[i][j] > largest_value_so_far)
|
||||
largest_value_so_far = quantization_mode_table[i][j];
|
||||
else
|
||||
quantization_mode_table[i][j] = largest_value_so_far;
|
||||
}
|
||||
}
|
||||
}
|
||||
431
3rdparty/astc/astc_symbolic_physical.cpp
vendored
Normal file
431
3rdparty/astc/astc_symbolic_physical.cpp
vendored
Normal file
@@ -0,0 +1,431 @@
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This confidential and proprietary software may be used only as
|
||||
* authorised by a licensing agreement from ARM Limited
|
||||
* (C) COPYRIGHT 2011-2012 ARM Limited
|
||||
* ALL RIGHTS RESERVED
|
||||
*
|
||||
* The entire notice above must be reproduced on all authorised
|
||||
* copies and copies may only be made to the extent permitted
|
||||
* by a licensing agreement from ARM Limited.
|
||||
*
|
||||
* @brief Functions to convert a compressed block between the symbolic and
|
||||
* the physical representation.
|
||||
*/
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
#include "astc_codec_internals.h"
|
||||
|
||||
// routine to write up to 8 bits
|
||||
static inline void write_bits(int value, int bitcount, int bitoffset, uint8_t * ptr)
|
||||
{
|
||||
int mask = (1 << bitcount) - 1;
|
||||
value &= mask;
|
||||
ptr += bitoffset >> 3;
|
||||
bitoffset &= 7;
|
||||
value <<= bitoffset;
|
||||
mask <<= bitoffset;
|
||||
mask = ~mask;
|
||||
|
||||
ptr[0] &= mask;
|
||||
ptr[0] |= value;
|
||||
ptr[1] &= mask >> 8;
|
||||
ptr[1] |= value >> 8;
|
||||
}
|
||||
|
||||
|
||||
// routine to read up to 8 bits
|
||||
static inline int read_bits(int bitcount, int bitoffset, const uint8_t * ptr)
|
||||
{
|
||||
int mask = (1 << bitcount) - 1;
|
||||
ptr += bitoffset >> 3;
|
||||
bitoffset &= 7;
|
||||
int value = ptr[0] | (ptr[1] << 8);
|
||||
value >>= bitoffset;
|
||||
value &= mask;
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
int bitrev8(int p)
|
||||
{
|
||||
p = ((p & 0xF) << 4) | ((p >> 4) & 0xF);
|
||||
p = ((p & 0x33) << 2) | ((p >> 2) & 0x33);
|
||||
p = ((p & 0x55) << 1) | ((p >> 1) & 0x55);
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
physical_compressed_block symbolic_to_physical(int xdim, int ydim, int zdim, const symbolic_compressed_block * sc)
|
||||
{
|
||||
int i, j;
|
||||
physical_compressed_block res;
|
||||
|
||||
|
||||
if (sc->block_mode == -2)
|
||||
{
|
||||
// UNORM16 constant-color block.
|
||||
// This encodes separate constant-color blocks. There is currently
|
||||
// no attempt to coalesce them into larger void-extents.
|
||||
|
||||
static const uint8_t cbytes[8] = { 0xFC, 0xFD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
|
||||
for (i = 0; i < 8; i++)
|
||||
res.data[i] = cbytes[i];
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
res.data[2 * i + 8] = sc->constant_color[i] & 0xFF;
|
||||
res.data[2 * i + 9] = (sc->constant_color[i] >> 8) & 0xFF;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
if (sc->block_mode == -1)
|
||||
{
|
||||
// FP16 constant-color block.
|
||||
// This encodes separate constant-color blocks. There is currently
|
||||
// no attempt to coalesce them into larger void-extents.
|
||||
|
||||
static const uint8_t cbytes[8] = { 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
|
||||
for (i = 0; i < 8; i++)
|
||||
res.data[i] = cbytes[i];
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
res.data[2 * i + 8] = sc->constant_color[i] & 0xFF;
|
||||
res.data[2 * i + 9] = (sc->constant_color[i] >> 8) & 0xFF;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int partition_count = sc->partition_count;
|
||||
|
||||
// first, compress the weights. They are encoded as an ordinary
|
||||
// integer-sequence, then bit-reversed
|
||||
uint8_t weightbuf[16];
|
||||
for (i = 0; i < 16; i++)
|
||||
weightbuf[i] = 0;
|
||||
|
||||
const block_size_descriptor *bsd = get_block_size_descriptor(xdim, ydim, zdim);
|
||||
const decimation_table *const *ixtab2 = bsd->decimation_tables;
|
||||
|
||||
|
||||
int weight_count = ixtab2[bsd->block_modes[sc->block_mode].decimation_mode]->num_weights;
|
||||
int weight_quantization_method = bsd->block_modes[sc->block_mode].quantization_mode;
|
||||
int is_dual_plane = bsd->block_modes[sc->block_mode].is_dual_plane;
|
||||
|
||||
int real_weight_count = is_dual_plane ? 2 * weight_count : weight_count;
|
||||
|
||||
int bits_for_weights = compute_ise_bitcount(real_weight_count,
|
||||
(quantization_method) weight_quantization_method);
|
||||
|
||||
|
||||
if (is_dual_plane)
|
||||
{
|
||||
uint8_t weights[64];
|
||||
for (i = 0; i < weight_count; i++)
|
||||
{
|
||||
weights[2 * i] = sc->plane1_weights[i];
|
||||
weights[2 * i + 1] = sc->plane2_weights[i];
|
||||
}
|
||||
encode_ise(weight_quantization_method, real_weight_count, weights, weightbuf, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
encode_ise(weight_quantization_method, weight_count, sc->plane1_weights, weightbuf, 0);
|
||||
}
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
res.data[i] = bitrev8(weightbuf[15 - i]);
|
||||
|
||||
write_bits(sc->block_mode, 11, 0, res.data);
|
||||
write_bits(partition_count - 1, 2, 11, res.data);
|
||||
|
||||
int below_weights_pos = 128 - bits_for_weights;
|
||||
|
||||
// encode partition index and color endpoint types for blocks with
|
||||
// 2 or more partitions.
|
||||
if (partition_count > 1)
|
||||
{
|
||||
write_bits(sc->partition_index, 6, 13, res.data);
|
||||
write_bits(sc->partition_index >> 6, PARTITION_BITS - 6, 19, res.data);
|
||||
|
||||
if (sc->color_formats_matched)
|
||||
{
|
||||
write_bits(sc->color_formats[0] << 2, 6, 13 + PARTITION_BITS, res.data);
|
||||
}
|
||||
else
|
||||
{
|
||||
// go through the selected endpoint type classes for each partition
|
||||
// in order to determine the lowest class present.
|
||||
int low_class = 4;
|
||||
for (i = 0; i < partition_count; i++)
|
||||
{
|
||||
int class_of_format = sc->color_formats[i] >> 2;
|
||||
if (class_of_format < low_class)
|
||||
low_class = class_of_format;
|
||||
}
|
||||
if (low_class == 3)
|
||||
low_class = 2;
|
||||
int encoded_type = low_class + 1;
|
||||
int bitpos = 2;
|
||||
for (i = 0; i < partition_count; i++)
|
||||
{
|
||||
int classbit_of_format = (sc->color_formats[i] >> 2) - low_class;
|
||||
|
||||
encoded_type |= classbit_of_format << bitpos;
|
||||
bitpos++;
|
||||
}
|
||||
for (i = 0; i < partition_count; i++)
|
||||
{
|
||||
int lowbits_of_format = sc->color_formats[i] & 3;
|
||||
encoded_type |= lowbits_of_format << bitpos;
|
||||
bitpos += 2;
|
||||
}
|
||||
int encoded_type_lowpart = encoded_type & 0x3F;
|
||||
int encoded_type_highpart = encoded_type >> 6;
|
||||
int encoded_type_highpart_size = (3 * partition_count) - 4;
|
||||
int encoded_type_highpart_pos = 128 - bits_for_weights - encoded_type_highpart_size;
|
||||
write_bits(encoded_type_lowpart, 6, 13 + PARTITION_BITS, res.data);
|
||||
write_bits(encoded_type_highpart, encoded_type_highpart_size, encoded_type_highpart_pos, res.data);
|
||||
|
||||
below_weights_pos -= encoded_type_highpart_size;
|
||||
}
|
||||
}
|
||||
|
||||
else
|
||||
write_bits(sc->color_formats[0], 4, 13, res.data);
|
||||
|
||||
// in dual-plane mode, encode the color component of the second plane of weights
|
||||
if (is_dual_plane)
|
||||
write_bits(sc->plane2_color_component, 2, below_weights_pos - 2, res.data);
|
||||
|
||||
// finally, encode the color bits
|
||||
// first, get hold of all the color components to encode
|
||||
uint8_t values_to_encode[32];
|
||||
int valuecount_to_encode = 0;
|
||||
for (i = 0; i < sc->partition_count; i++)
|
||||
{
|
||||
int vals = 2 * (sc->color_formats[i] >> 2) + 2;
|
||||
for (j = 0; j < vals; j++)
|
||||
values_to_encode[j + valuecount_to_encode] = sc->color_values[i][j];
|
||||
valuecount_to_encode += vals;
|
||||
}
|
||||
// then, encode an ISE based on them.
|
||||
encode_ise(sc->color_quantization_level, valuecount_to_encode, values_to_encode, res.data, (sc->partition_count == 1 ? 17 : 19 + PARTITION_BITS));
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
void physical_to_symbolic(int xdim, int ydim, int zdim, physical_compressed_block pb, symbolic_compressed_block * res)
|
||||
{
|
||||
uint8_t bswapped[16];
|
||||
int i, j;
|
||||
|
||||
res->error_block = 0;
|
||||
|
||||
// get hold of the block-size descriptor and the decimation tables.
|
||||
const block_size_descriptor *bsd = get_block_size_descriptor(xdim, ydim, zdim);
|
||||
const decimation_table *const *ixtab2 = bsd->decimation_tables;
|
||||
|
||||
// extract header fields
|
||||
int block_mode = read_bits(11, 0, pb.data);
|
||||
|
||||
|
||||
if ((block_mode & 0x1FF) == 0x1FC)
|
||||
{
|
||||
// void-extent block!
|
||||
|
||||
// check what format the data has
|
||||
if (block_mode & 0x200)
|
||||
res->block_mode = -1; // floating-point
|
||||
else
|
||||
res->block_mode = -2; // unorm16.
|
||||
|
||||
res->partition_count = 0;
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
res->constant_color[i] = pb.data[2 * i + 8] | (pb.data[2 * i + 9] << 8);
|
||||
}
|
||||
|
||||
// additionally, check that the void-extent
|
||||
if (zdim == 1)
|
||||
{
|
||||
// 2D void-extent
|
||||
int rsvbits = read_bits(2, 10, pb.data);
|
||||
if (rsvbits != 3)
|
||||
res->error_block = 1;
|
||||
|
||||
int vx_low_s = read_bits(8, 12, pb.data) | (read_bits(5, 12 + 8, pb.data) << 8);
|
||||
int vx_high_s = read_bits(8, 25, pb.data) | (read_bits(5, 25 + 8, pb.data) << 8);
|
||||
int vx_low_t = read_bits(8, 38, pb.data) | (read_bits(5, 38 + 8, pb.data) << 8);
|
||||
int vx_high_t = read_bits(8, 51, pb.data) | (read_bits(5, 51 + 8, pb.data) << 8);
|
||||
|
||||
int all_ones = vx_low_s == 0x1FFF && vx_high_s == 0x1FFF && vx_low_t == 0x1FFF && vx_high_t == 0x1FFF;
|
||||
|
||||
if ((vx_low_s >= vx_high_s || vx_low_t >= vx_high_t) && !all_ones)
|
||||
res->error_block = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
// 3D void-extent
|
||||
int vx_low_s = read_bits(9, 10, pb.data);
|
||||
int vx_high_s = read_bits(9, 19, pb.data);
|
||||
int vx_low_t = read_bits(9, 28, pb.data);
|
||||
int vx_high_t = read_bits(9, 37, pb.data);
|
||||
int vx_low_p = read_bits(9, 46, pb.data);
|
||||
int vx_high_p = read_bits(9, 55, pb.data);
|
||||
|
||||
int all_ones = vx_low_s == 0x1FF && vx_high_s == 0x1FF && vx_low_t == 0x1FF && vx_high_t == 0x1FF && vx_low_p == 0x1FF && vx_high_p == 0x1FF;
|
||||
|
||||
if ((vx_low_s >= vx_high_s || vx_low_t >= vx_high_t || vx_low_p >= vx_high_p) && !all_ones)
|
||||
res->error_block = 1;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (bsd->block_modes[block_mode].permit_decode == 0)
|
||||
{
|
||||
res->error_block = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
int weight_count = ixtab2[bsd->block_modes[block_mode].decimation_mode]->num_weights;
|
||||
int weight_quantization_method = bsd->block_modes[block_mode].quantization_mode;
|
||||
int is_dual_plane = bsd->block_modes[block_mode].is_dual_plane;
|
||||
|
||||
int real_weight_count = is_dual_plane ? 2 * weight_count : weight_count;
|
||||
|
||||
int partition_count = read_bits(2, 11, pb.data) + 1;
|
||||
|
||||
res->block_mode = block_mode;
|
||||
res->partition_count = partition_count;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
bswapped[i] = bitrev8(pb.data[15 - i]);
|
||||
|
||||
int bits_for_weights = compute_ise_bitcount(real_weight_count,
|
||||
(quantization_method) weight_quantization_method);
|
||||
|
||||
int below_weights_pos = 128 - bits_for_weights;
|
||||
|
||||
if (is_dual_plane)
|
||||
{
|
||||
uint8_t indices[64];
|
||||
decode_ise(weight_quantization_method, real_weight_count, bswapped, indices, 0);
|
||||
for (i = 0; i < weight_count; i++)
|
||||
{
|
||||
res->plane1_weights[i] = indices[2 * i];
|
||||
res->plane2_weights[i] = indices[2 * i + 1];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
decode_ise(weight_quantization_method, weight_count, bswapped, res->plane1_weights, 0);
|
||||
}
|
||||
|
||||
if (is_dual_plane && partition_count == 4)
|
||||
res->error_block = 1;
|
||||
|
||||
|
||||
|
||||
res->color_formats_matched = 0;
|
||||
|
||||
// then, determine the format of each endpoint pair
|
||||
int color_formats[4];
|
||||
int encoded_type_highpart_size = 0;
|
||||
if (partition_count == 1)
|
||||
{
|
||||
color_formats[0] = read_bits(4, 13, pb.data);
|
||||
res->partition_index = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
encoded_type_highpart_size = (3 * partition_count) - 4;
|
||||
below_weights_pos -= encoded_type_highpart_size;
|
||||
int encoded_type = read_bits(6, 13 + PARTITION_BITS, pb.data) | (read_bits(encoded_type_highpart_size, below_weights_pos, pb.data) << 6);
|
||||
int baseclass = encoded_type & 0x3;
|
||||
if (baseclass == 0)
|
||||
{
|
||||
for (i = 0; i < partition_count; i++)
|
||||
{
|
||||
color_formats[i] = (encoded_type >> 2) & 0xF;
|
||||
}
|
||||
below_weights_pos += encoded_type_highpart_size;
|
||||
res->color_formats_matched = 1;
|
||||
encoded_type_highpart_size = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
int bitpos = 2;
|
||||
baseclass--;
|
||||
for (i = 0; i < partition_count; i++)
|
||||
{
|
||||
color_formats[i] = (((encoded_type >> bitpos) & 1) + baseclass) << 2;
|
||||
bitpos++;
|
||||
}
|
||||
for (i = 0; i < partition_count; i++)
|
||||
{
|
||||
color_formats[i] |= (encoded_type >> bitpos) & 3;
|
||||
bitpos += 2;
|
||||
}
|
||||
}
|
||||
res->partition_index = read_bits(6, 13, pb.data) | (read_bits(PARTITION_BITS - 6, 19, pb.data) << 6);
|
||||
|
||||
}
|
||||
for (i = 0; i < partition_count; i++)
|
||||
res->color_formats[i] = color_formats[i];
|
||||
|
||||
|
||||
// then, determine the number of integers we need to unpack for the endpoint pairs
|
||||
int color_integer_count = 0;
|
||||
for (i = 0; i < partition_count; i++)
|
||||
{
|
||||
int endpoint_class = color_formats[i] >> 2;
|
||||
color_integer_count += (endpoint_class + 1) * 2;
|
||||
}
|
||||
|
||||
if (color_integer_count > 18)
|
||||
res->error_block = 1;
|
||||
|
||||
// then, determine the color endpoint format to use for these integers
|
||||
static const int color_bits_arr[5] = { -1, 115 - 4, 113 - 4 - PARTITION_BITS, 113 - 4 - PARTITION_BITS, 113 - 4 - PARTITION_BITS };
|
||||
int color_bits = color_bits_arr[partition_count] - bits_for_weights - encoded_type_highpart_size;
|
||||
if (is_dual_plane)
|
||||
color_bits -= 2;
|
||||
if (color_bits < 0)
|
||||
color_bits = 0;
|
||||
|
||||
int color_quantization_level = quantization_mode_table[color_integer_count >> 1][color_bits];
|
||||
res->color_quantization_level = color_quantization_level;
|
||||
if (color_quantization_level < 4)
|
||||
res->error_block = 1;
|
||||
|
||||
|
||||
// then unpack the integer-bits
|
||||
uint8_t values_to_decode[32];
|
||||
decode_ise(color_quantization_level, color_integer_count, pb.data, values_to_decode, (partition_count == 1 ? 17 : 19 + PARTITION_BITS));
|
||||
|
||||
// and distribute them over the endpoint types
|
||||
int valuecount_to_decode = 0;
|
||||
|
||||
for (i = 0; i < partition_count; i++)
|
||||
{
|
||||
int vals = 2 * (color_formats[i] >> 2) + 2;
|
||||
for (j = 0; j < vals; j++)
|
||||
res->color_values[i][j] = values_to_decode[j + valuecount_to_decode];
|
||||
valuecount_to_decode += vals;
|
||||
}
|
||||
|
||||
// get hold of color component for second-plane in the case of dual plane of weights.
|
||||
if (is_dual_plane)
|
||||
res->plane2_color_component = read_bits(2, below_weights_pos - 2, pb.data);
|
||||
|
||||
}
|
||||
598
3rdparty/astc/astc_weight_align.cpp
vendored
Normal file
598
3rdparty/astc/astc_weight_align.cpp
vendored
Normal file
@@ -0,0 +1,598 @@
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This confidential and proprietary software may be used only as
|
||||
* authorised by a licensing agreement from ARM Limited
|
||||
* (C) COPYRIGHT 2011-2012 ARM Limited
|
||||
* ALL RIGHTS RESERVED
|
||||
*
|
||||
* The entire notice above must be reproduced on all authorised
|
||||
* copies and copies may only be made to the extent permitted
|
||||
* by a licensing agreement from ARM Limited.
|
||||
*
|
||||
* @brief Angular-sum algorithm for weight alignment.
|
||||
*
|
||||
* This algorithm works as follows:
|
||||
* * we compute a complex number P as (cos s*i, sin s*i) for each
|
||||
* weight, where i is the input value and s is a scaling factor
|
||||
* based on the spacing between the weights.
|
||||
* * we then add together complex numbers for all the weights.
|
||||
* * we then compute the length and angle of the resulting sum.
|
||||
*
|
||||
* This should produce the following results:
|
||||
* * perfect alignment results in a vector whose length is equal to
|
||||
* the sum of lengths of all inputs
|
||||
* * even distribution results in a vector of length 0.
|
||||
* * all samples identical results in perfect alignment for every
|
||||
* scaling.
|
||||
*
|
||||
* For each scaling factor within a given set, we compute an alignment
|
||||
* factor from 0 to 1. This should then result in some scalings standing
|
||||
* out as having particularly good alignment factors; we can use this to
|
||||
* produce a set of candidate scale/shift values for various quantization
|
||||
* levels; we should then actually try them and see what happens.
|
||||
*
|
||||
* Assuming N quantization steps, the scaling factor becomes s=2*PI*(N-1);
|
||||
* we should probably have about 1 scaling factor for every 1/4
|
||||
* quantization step (perhaps 1/8 for low levels of quantization)
|
||||
*/
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
#include <math.h>
|
||||
#include "astc_codec_internals.h"
|
||||
|
||||
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
static const float angular_steppings[] = {
|
||||
1.0, 1.125,
|
||||
1.25, 1.375,
|
||||
1.5, 1.625,
|
||||
1.75, 1.875,
|
||||
|
||||
2.0, 2.25, 2.5, 2.75,
|
||||
3.0, 3.25, 3.5, 3.75,
|
||||
4.0, 4.25, 4.5, 4.75,
|
||||
5.0, 5.25, 5.5, 5.75,
|
||||
6.0, 6.25, 6.5, 6.75,
|
||||
7.0, 7.25, 7.5, 7.75,
|
||||
|
||||
8.0, 8.5,
|
||||
9.0, 9.5,
|
||||
10.0, 10.5,
|
||||
11.0, 11.5,
|
||||
12.0, 12.5,
|
||||
13.0, 13.5,
|
||||
14.0, 14.5,
|
||||
15.0, 15.5,
|
||||
16.0, 16.5,
|
||||
17.0, 17.5,
|
||||
18.0, 18.5,
|
||||
19.0, 19.5,
|
||||
20.0, 20.5,
|
||||
21.0, 21.5,
|
||||
22.0, 22.5,
|
||||
23.0, 23.5,
|
||||
24.0, 24.5,
|
||||
25.0, 25.5,
|
||||
26.0, 26.5,
|
||||
27.0, 27.5,
|
||||
28.0, 28.5,
|
||||
29.0, 29.5,
|
||||
30.0, 30.5,
|
||||
31.0, 31.5,
|
||||
32.0, 32.5,
|
||||
33.0, 33.5,
|
||||
34.0, 34.5,
|
||||
35.0, 35.5,
|
||||
};
|
||||
|
||||
#define ANGULAR_STEPS ((int)(sizeof(angular_steppings)/sizeof(angular_steppings[0])))
|
||||
|
||||
static float stepsizes[ANGULAR_STEPS];
|
||||
static float stepsizes_sqr[ANGULAR_STEPS];
|
||||
|
||||
static int max_angular_steps_needed_for_quant_level[13];
|
||||
|
||||
// we store sine/cosine values for 64 possible weight values; this causes
|
||||
// slight quality loss compared to using sin() and cos() directly.
|
||||
|
||||
#define SINCOS_STEPS 64
|
||||
|
||||
static float sin_table[SINCOS_STEPS][ANGULAR_STEPS];
|
||||
static float cos_table[SINCOS_STEPS][ANGULAR_STEPS];
|
||||
|
||||
void prepare_angular_tables(void)
|
||||
{
|
||||
int i, j;
|
||||
int max_angular_steps_needed_for_quant_steps[40];
|
||||
for (i = 0; i < ANGULAR_STEPS; i++)
|
||||
{
|
||||
stepsizes[i] = 1.0f / angular_steppings[i];
|
||||
stepsizes_sqr[i] = stepsizes[i] * stepsizes[i];
|
||||
|
||||
for (j = 0; j < SINCOS_STEPS; j++)
|
||||
{
|
||||
sin_table[j][i] = static_cast < float >(sin((2.0f * M_PI / (SINCOS_STEPS - 1.0f)) * angular_steppings[i] * j));
|
||||
cos_table[j][i] = static_cast < float >(cos((2.0f * M_PI / (SINCOS_STEPS - 1.0f)) * angular_steppings[i] * j));
|
||||
}
|
||||
|
||||
int p = static_cast < int >(floor(angular_steppings[i])) + 1;
|
||||
max_angular_steps_needed_for_quant_steps[p] = MIN(i + 1, ANGULAR_STEPS - 1);
|
||||
}
|
||||
|
||||
|
||||
// yes, the next-to-last entry is supposed to have the value 33. This because under
|
||||
// ASTC, the 32-weight mode leaves a double-sized hole in the middle of the
|
||||
// weight space, so we are better off matching 33 weights than 32.
|
||||
static const int steps_of_level[] = { 2, 3, 4, 5, 6, 8, 10, 12, 16, 20, 24, 33, 36 };
|
||||
|
||||
for (i = 0; i < 13; i++)
|
||||
max_angular_steps_needed_for_quant_level[i] = max_angular_steps_needed_for_quant_steps[steps_of_level[i]];
|
||||
|
||||
}
|
||||
|
||||
|
||||
union if32
|
||||
{
|
||||
float f;
|
||||
int32_t s;
|
||||
uint32_t u;
|
||||
};
|
||||
|
||||
|
||||
// function to compute angular sums; then, from the
|
||||
// angular sums, compute alignment factor and offset.
|
||||
|
||||
/* static inline */
|
||||
void compute_angular_offsets(int samplecount, const float *samples, const float *sample_weights, int max_angular_steps, float *offsets)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
float anglesum_x[ANGULAR_STEPS];
|
||||
float anglesum_y[ANGULAR_STEPS];
|
||||
|
||||
for (i = 0; i < max_angular_steps; i++)
|
||||
{
|
||||
anglesum_x[i] = 0;
|
||||
anglesum_y[i] = 0;
|
||||
}
|
||||
|
||||
|
||||
// compute the angle-sums.
|
||||
for (i = 0; i < samplecount; i++)
|
||||
{
|
||||
float sample = samples[i];
|
||||
float sample_weight = sample_weights[i];
|
||||
if32 p;
|
||||
p.f = (sample * (SINCOS_STEPS - 1.0f)) + 12582912.0f;
|
||||
unsigned int isample = p.u & 0x3F;
|
||||
|
||||
const float *sinptr = sin_table[isample];
|
||||
const float *cosptr = cos_table[isample];
|
||||
|
||||
for (j = 0; j < max_angular_steps; j++)
|
||||
{
|
||||
float cp = cosptr[j];
|
||||
float sp = sinptr[j];
|
||||
|
||||
anglesum_x[j] += cp * sample_weight;
|
||||
anglesum_y[j] += sp * sample_weight;
|
||||
}
|
||||
}
|
||||
|
||||
// post-process the angle-sums
|
||||
for (i = 0; i < max_angular_steps; i++)
|
||||
{
|
||||
float angle = atan2(anglesum_y[i], anglesum_x[i]); // positive angle -> positive offset
|
||||
offsets[i] = angle * (stepsizes[i] * (1.0f / (2.0f * (float)M_PI)));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// for a given step-size and a given offset, compute the
|
||||
// lowest and highest weight that results from quantizing using the stepsize & offset.
|
||||
// also, compute the resulting error.
|
||||
|
||||
|
||||
/* static inline */
|
||||
void compute_lowest_and_highest_weight(int samplecount, const float *samples, const float *sample_weights,
|
||||
int max_angular_steps, const float *offsets,
|
||||
int8_t * lowest_weight, int8_t * highest_weight,
|
||||
float *error, float *cut_low_weight_error, float *cut_high_weight_error)
|
||||
{
|
||||
int i;
|
||||
|
||||
int sp;
|
||||
|
||||
float error_from_forcing_weight_down[60];
|
||||
float error_from_forcing_weight_either_way[60];
|
||||
for (i = 0; i < 60; i++)
|
||||
{
|
||||
error_from_forcing_weight_down[i] = 0;
|
||||
error_from_forcing_weight_either_way[i] = 0;
|
||||
}
|
||||
|
||||
// weight + 12
|
||||
static const unsigned int idxtab[256] = {
|
||||
|
||||
12, 13, 14, 15, 16, 17, 18, 19,
|
||||
20, 21, 22, 23, 24, 25, 26, 27,
|
||||
28, 29, 30, 31, 32, 33, 34, 35,
|
||||
36, 37, 38, 39, 40, 41, 42, 43,
|
||||
44, 45, 46, 47, 48, 49, 50, 51,
|
||||
52, 53, 54, 55, 55, 55, 55, 55,
|
||||
55, 55, 55, 55, 55, 55, 55, 55,
|
||||
55, 55, 55, 55, 55, 55, 55, 55,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 1, 2, 3,
|
||||
4, 5, 6, 7, 8, 9, 10, 11,
|
||||
|
||||
12, 13, 14, 15, 16, 17, 18, 19,
|
||||
20, 21, 22, 23, 24, 25, 26, 27,
|
||||
28, 29, 30, 31, 32, 33, 34, 35,
|
||||
36, 37, 38, 39, 40, 41, 42, 43,
|
||||
44, 45, 46, 47, 48, 49, 50, 51,
|
||||
52, 53, 54, 55, 55, 55, 55, 55,
|
||||
55, 55, 55, 55, 55, 55, 55, 55,
|
||||
55, 55, 55, 55, 55, 55, 55, 55,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 1, 2, 3,
|
||||
4, 5, 6, 7, 8, 9, 10, 11
|
||||
};
|
||||
|
||||
|
||||
|
||||
for (sp = 0; sp < max_angular_steps; sp++)
|
||||
{
|
||||
unsigned int minidx_bias12 = 55;
|
||||
unsigned int maxidx_bias12 = 0;
|
||||
|
||||
float errval = 0.0f;
|
||||
|
||||
float rcp_stepsize = angular_steppings[sp];
|
||||
float offset = offsets[sp];
|
||||
|
||||
float scaled_offset = rcp_stepsize * offset;
|
||||
|
||||
|
||||
for (i = 0; i < samplecount - 1; i += 2)
|
||||
{
|
||||
float wt1 = sample_weights[i];
|
||||
float wt2 = sample_weights[i + 1];
|
||||
if32 p1, p2;
|
||||
float sval1 = (samples[i] * rcp_stepsize) - scaled_offset;
|
||||
float sval2 = (samples[i + 1] * rcp_stepsize) - scaled_offset;
|
||||
p1.f = sval1 + 12582912.0f; // FP representation abuse to avoid floor() and float->int conversion
|
||||
p2.f = sval2 + 12582912.0f; // FP representation abuse to avoid floor() and float->int conversion
|
||||
float isval1 = p1.f - 12582912.0f;
|
||||
float isval2 = p2.f - 12582912.0f;
|
||||
float dif1 = sval1 - isval1;
|
||||
float dif2 = sval2 - isval2;
|
||||
|
||||
errval += (dif1 * wt1) * dif1;
|
||||
errval += (dif2 * wt2) * dif2;
|
||||
|
||||
// table lookups that really perform a minmax function.
|
||||
unsigned int idx1_bias12 = idxtab[p1.u & 0xFF];
|
||||
unsigned int idx2_bias12 = idxtab[p2.u & 0xFF];
|
||||
|
||||
if (idx1_bias12 < minidx_bias12)
|
||||
minidx_bias12 = idx1_bias12;
|
||||
if (idx1_bias12 > maxidx_bias12)
|
||||
maxidx_bias12 = idx1_bias12;
|
||||
if (idx2_bias12 < minidx_bias12)
|
||||
minidx_bias12 = idx2_bias12;
|
||||
if (idx2_bias12 > maxidx_bias12)
|
||||
maxidx_bias12 = idx2_bias12;
|
||||
|
||||
error_from_forcing_weight_either_way[idx1_bias12] += wt1;
|
||||
error_from_forcing_weight_down[idx1_bias12] += (dif1 * wt1);
|
||||
|
||||
error_from_forcing_weight_either_way[idx2_bias12] += wt2;
|
||||
error_from_forcing_weight_down[idx2_bias12] += (dif2 * wt2);
|
||||
}
|
||||
|
||||
if (samplecount & 1)
|
||||
{
|
||||
i = samplecount - 1;
|
||||
float wt = sample_weights[i];
|
||||
if32 p;
|
||||
float sval = (samples[i] * rcp_stepsize) - scaled_offset;
|
||||
p.f = sval + 12582912.0f; // FP representation abuse to avoid floor() and float->int conversion
|
||||
float isval = p.f - 12582912.0f;
|
||||
float dif = sval - isval;
|
||||
|
||||
errval += (dif * wt) * dif;
|
||||
|
||||
unsigned int idx_bias12 = idxtab[p.u & 0xFF];
|
||||
|
||||
if (idx_bias12 < minidx_bias12)
|
||||
minidx_bias12 = idx_bias12;
|
||||
if (idx_bias12 > maxidx_bias12)
|
||||
maxidx_bias12 = idx_bias12;
|
||||
|
||||
error_from_forcing_weight_either_way[idx_bias12] += wt;
|
||||
error_from_forcing_weight_down[idx_bias12] += dif * wt;
|
||||
}
|
||||
|
||||
|
||||
lowest_weight[sp] = (int)minidx_bias12 - 12;
|
||||
highest_weight[sp] = (int)maxidx_bias12 - 12;
|
||||
error[sp] = errval;
|
||||
|
||||
// the cut_(lowest/highest)_weight_error indicate the error that results from
|
||||
// forcing samples that should have had the (lowest/highest) weight value
|
||||
// one step (up/down).
|
||||
cut_low_weight_error[sp] = error_from_forcing_weight_either_way[minidx_bias12] - 2.0f * error_from_forcing_weight_down[minidx_bias12];
|
||||
cut_high_weight_error[sp] = error_from_forcing_weight_either_way[maxidx_bias12] + 2.0f * error_from_forcing_weight_down[maxidx_bias12];
|
||||
|
||||
// clear out the error-from-forcing values we actually used in this pass
|
||||
// so that these are clean for the next pass.
|
||||
unsigned int ui;
|
||||
for (ui = minidx_bias12 & ~0x3; ui <= maxidx_bias12; ui += 4)
|
||||
{
|
||||
error_from_forcing_weight_either_way[ui] = 0;
|
||||
error_from_forcing_weight_down[ui] = 0;
|
||||
error_from_forcing_weight_either_way[ui + 1] = 0;
|
||||
error_from_forcing_weight_down[ui + 1] = 0;
|
||||
error_from_forcing_weight_either_way[ui + 2] = 0;
|
||||
error_from_forcing_weight_down[ui + 2] = 0;
|
||||
error_from_forcing_weight_either_way[ui + 3] = 0;
|
||||
error_from_forcing_weight_down[ui + 3] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for (sp = 0; sp < max_angular_steps; sp++)
|
||||
{
|
||||
float errscale = stepsizes_sqr[sp];
|
||||
error[sp] *= errscale;
|
||||
cut_low_weight_error[sp] *= errscale;
|
||||
cut_high_weight_error[sp] *= errscale;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// main function for running the angular algorithm.
|
||||
|
||||
|
||||
void compute_angular_endpoints_for_quantization_levels(int samplecount, const float *samples, const float *sample_weights, int max_quantization_level, float low_value[12], float high_value[12])
|
||||
{
|
||||
int i;
|
||||
|
||||
|
||||
max_quantization_level++; // Temporarily increase level - needs refinement
|
||||
|
||||
static const int quantization_steps_for_level[13] = { 2, 3, 4, 5, 6, 8, 10, 12, 16, 20, 24, 33, 36 };
|
||||
int max_quantization_steps = quantization_steps_for_level[max_quantization_level];
|
||||
|
||||
float offsets[ANGULAR_STEPS];
|
||||
|
||||
int max_angular_steps = max_angular_steps_needed_for_quant_level[max_quantization_level];
|
||||
|
||||
compute_angular_offsets(samplecount, samples, sample_weights, max_angular_steps, offsets);
|
||||
|
||||
|
||||
// the +4 offsets are to allow for vectorization within compute_lowest_and_highest_weight().
|
||||
int8_t lowest_weight[ANGULAR_STEPS + 4];
|
||||
int8_t highest_weight[ANGULAR_STEPS + 4];
|
||||
float error[ANGULAR_STEPS + 4];
|
||||
|
||||
float cut_low_weight_error[ANGULAR_STEPS + 4];
|
||||
float cut_high_weight_error[ANGULAR_STEPS + 4];
|
||||
|
||||
compute_lowest_and_highest_weight(samplecount, samples, sample_weights, max_angular_steps, offsets, lowest_weight, highest_weight, error, cut_low_weight_error, cut_high_weight_error);
|
||||
|
||||
|
||||
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
||||
if (print_diagnostics)
|
||||
{
|
||||
printf("%s : max-angular-steps=%d \n", __func__, max_angular_steps);
|
||||
printf("Samplecount=%d, max_quantization_level=%d\n", samplecount, max_quantization_level);
|
||||
for (i = 0; i < samplecount; i++)
|
||||
printf("Sample %d : %f (weight %f)\n", i, samples[i], sample_weights[i]);
|
||||
|
||||
for (i = 0; i < max_angular_steps; i++)
|
||||
{
|
||||
printf("%d: offset=%f error=%f lowest=%d highest=%d cl=%f ch=%f\n", i, offsets[i], error[i], lowest_weight[i], highest_weight[i], cut_low_weight_error[i], cut_high_weight_error[i]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
// for each quantization level, find the best error terms.
|
||||
float best_errors[40];
|
||||
int best_scale[40];
|
||||
uint8_t cut_low_weight[40];
|
||||
|
||||
for (i = 0; i < (max_quantization_steps + 4); i++)
|
||||
{
|
||||
best_errors[i] = 1e30f;
|
||||
best_scale[i] = -1; // Indicates no solution found
|
||||
cut_low_weight[i] = 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
for (i = 0; i < max_angular_steps; i++)
|
||||
{
|
||||
int samplecount = highest_weight[i] - lowest_weight[i] + 1;
|
||||
if (samplecount >= (max_quantization_steps + 4))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if (samplecount < 2)
|
||||
samplecount = 2;
|
||||
|
||||
if (best_errors[samplecount] > error[i])
|
||||
{
|
||||
best_errors[samplecount] = error[i];
|
||||
best_scale[samplecount] = i;
|
||||
cut_low_weight[samplecount] = 0;
|
||||
}
|
||||
|
||||
float error_cut_low = error[i] + cut_low_weight_error[i];
|
||||
float error_cut_high = error[i] + cut_high_weight_error[i];
|
||||
float error_cut_low_high = error[i] + cut_low_weight_error[i] + cut_high_weight_error[i];
|
||||
|
||||
if (best_errors[samplecount - 1] > error_cut_low)
|
||||
{
|
||||
best_errors[samplecount - 1] = error_cut_low;
|
||||
best_scale[samplecount - 1] = i;
|
||||
cut_low_weight[samplecount - 1] = 1;
|
||||
}
|
||||
|
||||
if (best_errors[samplecount - 1] > error_cut_high)
|
||||
{
|
||||
best_errors[samplecount - 1] = error_cut_high;
|
||||
best_scale[samplecount - 1] = i;
|
||||
cut_low_weight[samplecount - 1] = 0;
|
||||
}
|
||||
|
||||
if (best_errors[samplecount - 2] > error_cut_low_high)
|
||||
{
|
||||
best_errors[samplecount - 2] = error_cut_low_high;
|
||||
best_scale[samplecount - 2] = i;
|
||||
cut_low_weight[samplecount - 2] = 1;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// if we got a better error-value for a low sample count than for a high one,
|
||||
// use the low sample count error value for the higher sample count as well.
|
||||
for (i = 3; i <= max_quantization_steps; i++)
|
||||
{
|
||||
if (best_errors[i] > best_errors[i - 1])
|
||||
{
|
||||
best_errors[i] = best_errors[i - 1];
|
||||
best_scale[i] = best_scale[i - 1];
|
||||
cut_low_weight[i] = cut_low_weight[i - 1];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
max_quantization_level--; // Decrease level again (see corresponding ++, above)
|
||||
|
||||
static const int ql_weights[12] = { 2, 3, 4, 5, 6, 8, 10, 12, 16, 20, 24, 33 };
|
||||
for (i = 0; i <= max_quantization_level; i++)
|
||||
{
|
||||
int q = ql_weights[i];
|
||||
int bsi = best_scale[q];
|
||||
|
||||
// Did we find anything?
|
||||
if(bsi < 0)
|
||||
{
|
||||
printf("ERROR: Unable to find an encoding within the specified error limits. Please revise the error limit values and try again.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
float stepsize = stepsizes[bsi];
|
||||
int lwi = lowest_weight[bsi] + cut_low_weight[q];
|
||||
int hwi = lwi + q - 1;
|
||||
float offset = offsets[bsi];
|
||||
|
||||
low_value[i] = offset + lwi * stepsize;
|
||||
high_value[i] = offset + hwi * stepsize;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
// helper functions that will compute ideal angular-endpoints
|
||||
// for a given set of weights and a given block size descriptors
|
||||
|
||||
void compute_angular_endpoints_1plane(float mode_cutoff, const block_size_descriptor * bsd,
|
||||
const float *decimated_quantized_weights, const float *decimated_weights,
|
||||
float low_value[MAX_WEIGHT_MODES], float high_value[MAX_WEIGHT_MODES])
|
||||
{
|
||||
int i;
|
||||
float low_values[MAX_DECIMATION_MODES][12];
|
||||
float high_values[MAX_DECIMATION_MODES][12];
|
||||
|
||||
for (i = 0; i < MAX_DECIMATION_MODES; i++)
|
||||
{
|
||||
int samplecount = bsd->decimation_mode_samples[i];
|
||||
int quant_mode = bsd->decimation_mode_maxprec_1plane[i];
|
||||
float percentile = bsd->decimation_mode_percentile[i];
|
||||
int permit_encode = bsd->permit_encode[i];
|
||||
if (permit_encode == 0 || samplecount < 1 || quant_mode < 0 || percentile > mode_cutoff)
|
||||
continue;
|
||||
|
||||
|
||||
compute_angular_endpoints_for_quantization_levels(samplecount,
|
||||
decimated_quantized_weights + i * MAX_WEIGHTS_PER_BLOCK,
|
||||
decimated_weights + i * MAX_WEIGHTS_PER_BLOCK, quant_mode, low_values[i], high_values[i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < MAX_WEIGHT_MODES; i++)
|
||||
{
|
||||
if (bsd->block_modes[i].is_dual_plane != 0 || bsd->block_modes[i].percentile > mode_cutoff)
|
||||
continue;
|
||||
int quant_mode = bsd->block_modes[i].quantization_mode;
|
||||
int decim_mode = bsd->block_modes[i].decimation_mode;
|
||||
|
||||
low_value[i] = low_values[decim_mode][quant_mode];
|
||||
high_value[i] = high_values[decim_mode][quant_mode];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
void compute_angular_endpoints_2planes(float mode_cutoff,
|
||||
const block_size_descriptor * bsd,
|
||||
const float *decimated_quantized_weights,
|
||||
const float *decimated_weights,
|
||||
float low_value1[MAX_WEIGHT_MODES], float high_value1[MAX_WEIGHT_MODES], float low_value2[MAX_WEIGHT_MODES], float high_value2[MAX_WEIGHT_MODES])
|
||||
{
|
||||
int i;
|
||||
float low_values1[MAX_DECIMATION_MODES][12];
|
||||
float high_values1[MAX_DECIMATION_MODES][12];
|
||||
float low_values2[MAX_DECIMATION_MODES][12];
|
||||
float high_values2[MAX_DECIMATION_MODES][12];
|
||||
|
||||
for (i = 0; i < MAX_DECIMATION_MODES; i++)
|
||||
{
|
||||
int samplecount = bsd->decimation_mode_samples[i];
|
||||
int quant_mode = bsd->decimation_mode_maxprec_2planes[i];
|
||||
float percentile = bsd->decimation_mode_percentile[i];
|
||||
int permit_encode = bsd->permit_encode[i];
|
||||
if (permit_encode == 0 || samplecount < 1 || quant_mode < 0 || percentile > mode_cutoff)
|
||||
continue;
|
||||
|
||||
compute_angular_endpoints_for_quantization_levels(samplecount,
|
||||
decimated_quantized_weights + 2 * i * MAX_WEIGHTS_PER_BLOCK,
|
||||
decimated_weights + 2 * i * MAX_WEIGHTS_PER_BLOCK, quant_mode, low_values1[i], high_values1[i]);
|
||||
|
||||
compute_angular_endpoints_for_quantization_levels(samplecount,
|
||||
decimated_quantized_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK,
|
||||
decimated_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK, quant_mode, low_values2[i], high_values2[i]);
|
||||
|
||||
}
|
||||
|
||||
for (i = 0; i < MAX_WEIGHT_MODES; i++)
|
||||
{
|
||||
if (bsd->block_modes[i].is_dual_plane != 1 || bsd->block_modes[i].percentile > mode_cutoff)
|
||||
continue;
|
||||
int quant_mode = bsd->block_modes[i].quantization_mode;
|
||||
int decim_mode = bsd->block_modes[i].decimation_mode;
|
||||
|
||||
low_value1[i] = low_values1[decim_mode][quant_mode];
|
||||
high_value1[i] = high_values1[decim_mode][quant_mode];
|
||||
low_value2[i] = low_values2[decim_mode][quant_mode];
|
||||
high_value2[i] = high_values2[decim_mode][quant_mode];
|
||||
}
|
||||
}
|
||||
1003
3rdparty/astc/astc_weight_quant_xfer_tables.cpp
vendored
Normal file
1003
3rdparty/astc/astc_weight_quant_xfer_tables.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
137
3rdparty/astc/license.txt
vendored
Normal file
137
3rdparty/astc/license.txt
vendored
Normal file
@@ -0,0 +1,137 @@
|
||||
END USER LICENCE AGREEMENT FOR THE MALI ASTC SPECIFICATION AND SOFTWARE CODEC,
|
||||
VERSION: 1.3
|
||||
|
||||
THIS END USER LICENCE AGREEMENT ("LICENCE") IS A LEGAL AGREEMENT BETWEEN YOU
|
||||
(EITHER A SINGLE INDIVIDUAL, OR SINGLE LEGAL ENTITY) AND ARM LIMITED ("ARM")
|
||||
FOR THE USE OF THE SOFTWARE ACCOMPANYING THIS LICENCE. ARM IS ONLY WILLING
|
||||
TO LICENSE THE SOFTWARE TO YOU ON CONDITION THAT YOU ACCEPT ALL OF THE TERMS
|
||||
IN THIS LICENCE. BY CLICKING "I AGREE" OR BY INSTALLING OR OTHERWISE USING
|
||||
OR COPYING THE SOFTWARE YOU INDICATE THAT YOU AGREE TO BE BOUND BY ALL THE
|
||||
TERMS OF THIS LICENCE.
|
||||
|
||||
IF YOU DO NOT AGREE TO THE TERMS OF THIS LICENCE, ARM IS UNWILLING TO LICENSE
|
||||
THE SOFTWARE TO YOU AND YOU MAY NOT INSTALL, USE OR COPY THE SOFTWARE.
|
||||
|
||||
1. DEFINITIONS.
|
||||
|
||||
"Authorised Purpose" means the use of the Software solely to develop products
|
||||
and tools which implement the Khronos ASTC specification to;
|
||||
(i) compress texture images into ASTC format ("Compression Results");
|
||||
(ii) distribute such Compression Results to third parties; and
|
||||
(iii) decompress texture images stored in ASTC format.
|
||||
|
||||
"Software" means the source code and Software binaries accompanying this
|
||||
Licence, and any printed, electronic or online documentation supplied with it,
|
||||
in all cases relating to the MALI ASTC SPECIFICATION AND SOFTWARE CODEC.
|
||||
|
||||
2. LICENCE GRANT.
|
||||
|
||||
ARM hereby grants to you, subject to the terms and conditions of this Licence,
|
||||
a nonexclusive, nontransferable, free of charge, royalty free, worldwide
|
||||
licence to use, copy, modify and (subject to Clause 3 below) distribute the
|
||||
Software solely for the Authorised Purpose.
|
||||
|
||||
No right is granted to use the Software to develop hardware.
|
||||
|
||||
Notwithstanding the foregoing, nothing in this Licence prevents you from
|
||||
using the Software to develop products that conform to an application
|
||||
programming interface specification issued by The Khronos Group Inc.
|
||||
("Khronos"), provided that you have licences to develop such products
|
||||
under the relevant Khronos agreements.
|
||||
|
||||
3. RESTRICTIONS ON USE OF THE SOFTWARE.
|
||||
|
||||
RESTRICTIONS ON TRANSFER OF LICENSED RIGHTS: The rights granted to you under
|
||||
this Licence may not be assigned by you to any third party without the prior
|
||||
written consent of ARM.
|
||||
|
||||
TITLE AND RESERVATION OF RIGHTS: You acquire no rights to the Software other
|
||||
than as expressly provided by this Licence. The Software is licensed not sold.
|
||||
ARM does not transfer title to the Software to you. In no event shall the
|
||||
licences granted in Clause 2 be construed as granting you expressly or by
|
||||
implication, estoppel or otherwise, licences to any ARM technology other than
|
||||
the Software.
|
||||
|
||||
NOTICES: You shall not remove from the Software any copyright notice or other
|
||||
notice (whether ARM's or its licensor's), and you shall ensure that any such
|
||||
notice is reproduced in any copies of the whole or any part of the Software
|
||||
made by you. You shall not use ARM's or its licensor's name, logo or
|
||||
trademarks to market Compression Results. If you distribute the Software to a
|
||||
third party, you agree to include a copy of this Licence with such
|
||||
distribution.
|
||||
|
||||
4. NO SUPPORT.
|
||||
|
||||
ARM has no obligation to support or to continue providing or updating any of
|
||||
the Software.
|
||||
|
||||
5. NO WARRANTIES.
|
||||
|
||||
YOU AGREE THAT THE SOFTWARE IS LICENSED "AS IS", AND THAT ARM EXPRESSLY
|
||||
DISCLAIMS ALL REPRESENTATIONS, WARRANTIES, CONDITIONS OR OTHER TERMS, EXPRESS,
|
||||
IMPLIED OR STATUTORY, TO THE FULLEST EXTENT PERMITTED BY LAW. YOU EXPRESSLY
|
||||
ASSUME ALL LIABILITIES AND RISKS, FOR USE OR OPERATION OF ANY APPLICATION
|
||||
PROGRAMS YOU CREATE WITH THE SOFTWARE, AND YOU ASSUME THE ENTIRE COST OF ALL
|
||||
NECESSARY SERVICING, REPAIR OR CORRECTION.
|
||||
|
||||
6. LIMITATION OF LIABILITY.
|
||||
|
||||
TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT SHALL ARM BE
|
||||
LIABLE FOR ANY INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING LOSS OF PROFITS) ARISING OUT OF THE USE OR INABILITY TO USE THE
|
||||
SOFTWARE WHETHER BASED ON A CLAIM UNDER CONTRACT, TORT OR OTHER LEGAL THEORY,
|
||||
EVEN IF ARM WAS ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
|
||||
|
||||
ARM does not seek to limit or exclude liability for death or personal injury
|
||||
arising from ARM's negligence and because some jurisdictions do not permit the
|
||||
exclusion or limitation of liability for consequential or incidental damages
|
||||
the above limitation relating to liability for consequential damages may not
|
||||
apply to you.
|
||||
|
||||
NOTWITHSTANDING ANYTHING TO THE CONTRARY CONTAINED IN THIS LICENCE, THE
|
||||
MAXIMUM LIABILITY OF ARM TO YOU IN AGGREGATE FOR ALL CLAIMS MADE AGAINST ARM
|
||||
IN CONTRACT TORT OR OTHERWISE UNDER OR IN CONNECTION WITH THE SUBJECT MATTER
|
||||
OF THIS LICENCE SHALL NOT EXCEED THE GREATER OF THE TOTAL OF SUMS PAID BY YOU
|
||||
TO ARM (IF ANY) FOR THIS LICENCE AND US$5.00.
|
||||
|
||||
7. U.S. GOVERNMENT END USERS.
|
||||
|
||||
US Government Restrictions: Use, duplication, reproduction, release,
|
||||
modification, disclosure or transfer of this commercial product and
|
||||
accompanying documentation is restricted in accordance with the terms
|
||||
of this Licence.
|
||||
|
||||
8. TERM AND TERMINATION.
|
||||
|
||||
This Licence shall remain in force until terminated by you or by ARM. Without
|
||||
prejudice to any of its other rights if you are in breach of any of the terms
|
||||
and conditions of this Licence then ARM may terminate this Licence immediately
|
||||
upon giving written notice to you. You may terminate this Licence at any time.
|
||||
|
||||
Upon termination of this Licence by you or by ARM you shall stop using the
|
||||
Software and destroy all copies of the Software in your possession together
|
||||
with all documentation and related materials. The provisions of Clauses 1, 3,
|
||||
4, 5, 6, 7, 8 and 9 shall survive termination of this Licence.
|
||||
|
||||
9. GENERAL.
|
||||
|
||||
This Licence is governed by English Law. Except where ARM agrees otherwise in
|
||||
a written contract signed by you and ARM, this is the only agreement between
|
||||
you and ARM relating to the Software and it may only be modified by written
|
||||
agreement between you and ARM. Except as expressly agreed in writing, this
|
||||
Licence may not be modified by purchase orders, advertising or other
|
||||
representation by any person. If any clause in this Licence is held by a court
|
||||
of law to be illegal or unenforceable the remaining provisions of this Licence
|
||||
shall not be affected thereby. The failure by ARM to enforce any of the
|
||||
provisions of this Licence, unless waived in writing, shall not constitute a
|
||||
waiver of ARM's rights to enforce such provision or any other provision of
|
||||
this Licence in the future.
|
||||
|
||||
You agree to comply fully with all laws and regulations of the United States
|
||||
and other countries ("Export Laws") to assure that the Software is not;
|
||||
(1) exported, directly or indirectly, in violation of Export Laws, either to
|
||||
any countries that are subject to U.S.A. export restrictions or to any end
|
||||
user who has been prohibited from participating in the U.S.A. export
|
||||
transactions by any federal agency of the U.S.A. government; or
|
||||
(2) intended to be used for any purpose prohibited by Export Laws, including,
|
||||
without limitation, nuclear, chemical, or biological weapons proliferation.
|
||||
772
3rdparty/astc/mathlib.cpp
vendored
Normal file
772
3rdparty/astc/mathlib.cpp
vendored
Normal file
@@ -0,0 +1,772 @@
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This confidential and proprietary software may be used only as
|
||||
* authorised by a licensing agreement from ARM Limited
|
||||
* (C) COPYRIGHT 2011-2012 ARM Limited
|
||||
* ALL RIGHTS RESERVED
|
||||
*
|
||||
* The entire notice above must be reproduced on all authorised
|
||||
* copies and copies may only be made to the extent permitted
|
||||
* by a licensing agreement from ARM Limited.
|
||||
*
|
||||
* @brief Library of math functions.
|
||||
*/
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
#include <time.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include "mathlib.h"
|
||||
|
||||
/**************************
|
||||
basic OpenCL functions
|
||||
**************************/
|
||||
|
||||
float inversesqrt(float p)
|
||||
{
|
||||
return 1.0f / sqrt(p);
|
||||
}
|
||||
float acospi(float p)
|
||||
{
|
||||
return static_cast < float >(acos(p) * (1.0f / M_PI));
|
||||
};
|
||||
float sinpi(float p)
|
||||
{
|
||||
return static_cast < float >(sin(p * M_PI));
|
||||
}
|
||||
float cospi(float p)
|
||||
{
|
||||
return static_cast < float >(cos(p * M_PI));
|
||||
}
|
||||
|
||||
|
||||
float nan(int p)
|
||||
{
|
||||
union
|
||||
{
|
||||
int p;
|
||||
float q;
|
||||
} v;
|
||||
v.p = p | 0x7FC00000U;
|
||||
return v.q;
|
||||
}
|
||||
|
||||
#if (!_MSC_VER) && (__cplusplus < 201103L)
|
||||
float fmax(float p, float q)
|
||||
{
|
||||
if (p != p)
|
||||
return q;
|
||||
if (q != q)
|
||||
return p;
|
||||
if (p > q)
|
||||
return p;
|
||||
return q;
|
||||
}
|
||||
|
||||
float fmin(float p, float q)
|
||||
{
|
||||
if (p != p)
|
||||
return q;
|
||||
if (q != q)
|
||||
return p;
|
||||
if (p < q)
|
||||
return p;
|
||||
return q;
|
||||
}
|
||||
#endif // C++11
|
||||
|
||||
float2 fmax(float2 p, float2 q)
|
||||
{
|
||||
return float2(fmax(p.x, q.x), fmax(p.y, q.y));
|
||||
}
|
||||
|
||||
float3 fmax(float3 p, float3 q)
|
||||
{
|
||||
return float3(fmax(p.x, q.x), fmax(p.y, q.y), fmax(p.z, q.z));
|
||||
}
|
||||
|
||||
float4 fmax(float4 p, float4 q)
|
||||
{
|
||||
return float4(fmax(p.x, q.x), fmax(p.y, q.y), fmax(p.z, q.z), fmax(p.w, q.w));
|
||||
}
|
||||
|
||||
|
||||
float2 fmin(float2 p, float2 q)
|
||||
{
|
||||
return float2(fmin(p.x, q.x), fmin(p.y, q.y));
|
||||
}
|
||||
|
||||
float3 fmin(float3 p, float3 q)
|
||||
{
|
||||
return float3(fmin(p.x, q.x), fmin(p.y, q.y), fmin(p.z, q.z));
|
||||
}
|
||||
|
||||
float4 fmin(float4 p, float4 q)
|
||||
{
|
||||
return float4(fmin(p.x, q.x), fmin(p.y, q.y), fmin(p.z, q.z), fmin(p.w, q.w));
|
||||
}
|
||||
|
||||
/*
|
||||
float dot( float2 p, float2 q ) { return p.x*q.x + p.y*q.y; } float dot( float3 p, float3 q ) { return p.x*q.x + p.y*q.y + p.z*q.z; } float dot( float4 p, float4 q ) { return p.x*q.x + p.y*q.y +
|
||||
p.z*q.z + p.w*q.w; } */
|
||||
|
||||
float3 cross(float3 p, float3 q)
|
||||
{
|
||||
return p.yzx * q.zxy - p.zxy * q.yzx;
|
||||
}
|
||||
|
||||
float4 cross(float4 p, float4 q)
|
||||
{
|
||||
return float4(p.yzx * q.zxy - p.zxy * q.yzx, 0.0f);
|
||||
}
|
||||
|
||||
float length(float2 p)
|
||||
{
|
||||
return sqrt(dot(p, p));
|
||||
}
|
||||
|
||||
float length(float3 p)
|
||||
{
|
||||
return sqrt(dot(p, p));
|
||||
}
|
||||
|
||||
float length(float4 p)
|
||||
{
|
||||
return sqrt(dot(p, p));
|
||||
}
|
||||
|
||||
float length_sqr(float2 p)
|
||||
{
|
||||
return dot(p, p);
|
||||
}
|
||||
|
||||
float length_sqr(float3 p)
|
||||
{
|
||||
return dot(p, p);
|
||||
}
|
||||
|
||||
float length_sqr(float4 p)
|
||||
{
|
||||
return dot(p, p);
|
||||
}
|
||||
|
||||
|
||||
float distance(float2 p, float2 q)
|
||||
{
|
||||
return length(q - p);
|
||||
}
|
||||
|
||||
float distance(float3 p, float3 q)
|
||||
{
|
||||
return length(q - p);
|
||||
}
|
||||
|
||||
float distance(float4 p, float4 q)
|
||||
{
|
||||
return length(q - p);
|
||||
}
|
||||
|
||||
float distance_sqr(float2 p, float2 q)
|
||||
{
|
||||
return length_sqr(q - p);
|
||||
}
|
||||
|
||||
float distance_sqr(float3 p, float3 q)
|
||||
{
|
||||
return length_sqr(q - p);
|
||||
}
|
||||
|
||||
float distance_sqr(float4 p, float4 q)
|
||||
{
|
||||
return length_sqr(q - p);
|
||||
}
|
||||
|
||||
|
||||
float2 normalize(float2 p)
|
||||
{
|
||||
return p / length(p);
|
||||
}
|
||||
|
||||
float3 normalize(float3 p)
|
||||
{
|
||||
return p / length(p);
|
||||
}
|
||||
|
||||
float4 normalize(float4 p)
|
||||
{
|
||||
return p / length(p);
|
||||
}
|
||||
|
||||
|
||||
/**************************************************
|
||||
matrix functions, for 2x2, 3x3 and 4x4 matrices:
|
||||
|
||||
* trace
|
||||
* determinant
|
||||
* transform
|
||||
* inverse
|
||||
* adjugate
|
||||
* characteristic polynomial
|
||||
* eigenvalue
|
||||
* eigenvector
|
||||
|
||||
additionally, root solver
|
||||
for 2nd, 3rd and 4th degree monic polynomials.
|
||||
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
struct mat2 { float2 v[2]; };
|
||||
struct mat3 { float3 v[3]; };
|
||||
struct mat4 { float4 v[4]; };
|
||||
*/
|
||||
|
||||
float trace(mat2 p)
|
||||
{
|
||||
return p.v[0].x + p.v[1].y;
|
||||
}
|
||||
|
||||
float trace(mat3 p)
|
||||
{
|
||||
return p.v[0].x + p.v[1].y + p.v[2].z;
|
||||
}
|
||||
|
||||
float trace(mat4 p)
|
||||
{
|
||||
return p.v[0].x + p.v[1].y + p.v[2].z + p.v[3].w;
|
||||
}
|
||||
|
||||
float determinant(mat2 p)
|
||||
{
|
||||
float2 v = p.v[0].xy * p.v[1].yx;
|
||||
return v.x - v.y;
|
||||
}
|
||||
|
||||
float determinant(mat3 p)
|
||||
{
|
||||
return dot(p.v[0], cross(p.v[1], p.v[2]));
|
||||
}
|
||||
|
||||
float determinant(mat4 p)
|
||||
{
|
||||
return dot(p.v[0],
|
||||
float4(dot(p.v[1].yzw, cross(p.v[2].yzw, p.v[3].yzw)),
|
||||
-dot(p.v[1].xzw, cross(p.v[2].xzw, p.v[3].xzw)), dot(p.v[1].xyw, cross(p.v[2].xyw, p.v[3].xyw)), -dot(p.v[1].xyz, cross(p.v[2].xyz, p.v[3].xyz))));
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
characteristic polynomials for matrices. These polynomials are monic, meaning that the coefficient of the highest component is 1; this component is omitted. The first component is the constant
|
||||
part. */
|
||||
|
||||
float2 characteristic_poly(mat2 p)
|
||||
{
|
||||
return float2(determinant(p), -trace(p));
|
||||
}
|
||||
|
||||
|
||||
float3 characteristic_poly(mat3 p)
|
||||
{
|
||||
float2 v1 = (p.v[0].xy * p.v[1].yx) + (p.v[0].xz * p.v[2].zx) + (p.v[1].yz * p.v[2].zy);
|
||||
|
||||
return float3(-determinant(p), v1.x - v1.y, -trace(p));
|
||||
}
|
||||
|
||||
|
||||
float4 characteristic_poly(mat4 p)
|
||||
{
|
||||
float2 v1 = (p.v[0].xy * p.v[1].yx) + (p.v[0].xz * p.v[2].zx) + (p.v[0].xw * p.v[3].wx) + (p.v[1].yz * p.v[2].zy) + (p.v[1].yw * p.v[3].wy) + (p.v[2].zw * p.v[3].wz);
|
||||
|
||||
return float4(determinant(p),
|
||||
-dot(p.v[1].yzw, cross(p.v[2].yzw, p.v[3].yzw))
|
||||
- dot(p.v[0].xzw, cross(p.v[2].xzw, p.v[3].xzw)) - dot(p.v[0].xyw, cross(p.v[1].xyw, p.v[3].xyw)) - dot(p.v[0].xyz, cross(p.v[1].xyz, p.v[2].xyz)), v1.x - v1.y, -trace(p));
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Root finders for monic polynomials (highest coefficient is equal to 1)
|
||||
|
||||
Returns a vector with length equal to the number of roots that the polynomial has;
|
||||
for roots that do not genuinely exist, we return NaN.
|
||||
|
||||
The polynomial is basically
|
||||
|
||||
poly(n) = p.x + p.y*n + p.z*n^2 + p.w*n^3
|
||||
|
||||
(including only the components of the vector that actually exist; the next coefficient
|
||||
has the value 1, and the remaining ones have value 0. )
|
||||
*/
|
||||
|
||||
|
||||
float2 solve_monic(float2 p)
|
||||
{
|
||||
float v = sqrt(p.y * p.y - 4 * p.x);
|
||||
return (p.yy + float2(v, -v)) * -0.5f;
|
||||
}
|
||||
|
||||
float3 solve_monic(float3 p)
|
||||
{
|
||||
|
||||
p = p * (1.0f / 3.0f);
|
||||
|
||||
float pz = p.z;
|
||||
|
||||
// compute a normalization value to scale the vector by.
|
||||
// The normalization factor is divided by 2^20.
|
||||
// This is supposed to make internal calculations unlikely
|
||||
// to overflow while also making underflows unlikely.
|
||||
float scal = 1.0f;
|
||||
|
||||
float cx = static_cast < float >(cbrt(fabs(p.x)));
|
||||
float cy = static_cast < float >(cbrt(fabs(p.y)));
|
||||
scal = fmax(fmax(fabsf(p.z), cx), cy * cy) * (1.0f / 1048576.0f);
|
||||
float rscal = 1.0f / scal;
|
||||
p = p * float3(rscal * rscal * rscal, rscal * rscal, rscal);
|
||||
|
||||
float bb = p.z * p.z; // div scal^2
|
||||
|
||||
float nq = bb - p.y; // div scal^2
|
||||
float r = 1.5f * (p.y * p.z - p.x) - p.z * bb; // div scal^3
|
||||
float nq3 = nq * nq * nq; // div scal^6
|
||||
float r2 = r * r; // div scal^6
|
||||
|
||||
if (nq3 < r2)
|
||||
{
|
||||
// one root
|
||||
float root = sqrt(r2 - nq3); // div scal^3
|
||||
float s = static_cast < float >(cbrt(r + root)); // div scal
|
||||
float t = static_cast < float >(cbrt(r - root)); // div scal
|
||||
return float3((s + t) * scal - pz, nan(0), nan(0));
|
||||
}
|
||||
else
|
||||
{
|
||||
// three roots
|
||||
float phi_r = inversesqrt(nq3); // div scal ^ -3
|
||||
float phi_root = static_cast < float >(cbrt(phi_r * nq3)); // div scal
|
||||
float theta = acospi(r * phi_r);
|
||||
theta *= 1.0f / 3.0f;
|
||||
float ncprod = phi_root * cospi(theta);
|
||||
float dev = 1.73205080756887729353f * phi_root * sinpi(theta);
|
||||
return float3(2 * ncprod, -dev - ncprod, dev - ncprod) * scal - pz;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* This function is not overflow-safe. Use with care.
|
||||
*/
|
||||
float4 solve_monic(float4 p)
|
||||
{
|
||||
|
||||
// step 1: depress the input polynomial
|
||||
float bias = p.w * 0.25f;
|
||||
float3 qv = float3((-3.0f / 256.0f) * p.w * p.w, (1.0f / 8.0f) * p.w, (-3.0 / 8.0f));
|
||||
float3 rv = float3((1.0f / 16.0f) * p.z * p.w - (1.0f / 4.0f) * p.y, (-1.0f / 2.0f) * p.z, 0.0f);
|
||||
float3 qx = float3(qv * p.w + rv) * p.w + p.xyz;
|
||||
|
||||
// step 2: solve a cubic equation to get hold of a parameter p.
|
||||
float3 monicp = float3(-qx.y * qx.y, (qx.z * qx.z) - (4.0f * qx.x), 2.0f * qx.z);
|
||||
float4 v = float4(solve_monic(monicp), 1e-37f);
|
||||
|
||||
// the cubic equation may have multiple solutions; at least one of them
|
||||
// is numerically at least nonnegative (but may have become negative as a result of
|
||||
// a roundoff error). We use fmax() to extract this value or a very small positive value.
|
||||
float2 v2 = fmax(v.xy, v.zw);
|
||||
float p2 = fmax(v2.x, v2.y); // p^2
|
||||
float pr = inversesqrt(p2); // 1/p
|
||||
float pm = p2 * pr; // p
|
||||
|
||||
// step 3: use the solution for the cubic equation to set up two quadratic equations;
|
||||
// these two equations then result in the 4 possible roots.
|
||||
float f1 = qx.z + p2;
|
||||
float f2 = qx.y * pr;
|
||||
float s = 0.5f * (f1 + f2);
|
||||
float q = 0.5f * (f1 - f2);
|
||||
|
||||
float4 res = float4(solve_monic(float2(q, pm)),
|
||||
solve_monic(float2(s, -pm)));
|
||||
|
||||
// finally, order the results and apply the bias.
|
||||
if (res.x != res.x)
|
||||
return res.zwxy - bias;
|
||||
else
|
||||
return res - bias;
|
||||
}
|
||||
|
||||
|
||||
|
||||
float2 transform(mat2 p, float2 q)
|
||||
{
|
||||
return float2(dot(p.v[0], q), dot(p.v[1], q));
|
||||
}
|
||||
|
||||
|
||||
float3 transform(mat3 p, float3 q)
|
||||
{
|
||||
return float3(dot(p.v[0], q), dot(p.v[1], q), dot(p.v[2], q));
|
||||
}
|
||||
|
||||
|
||||
float4 transform(mat4 p, float4 q)
|
||||
{
|
||||
return float4(dot(p.v[0], q), dot(p.v[1], q), dot(p.v[2], q), dot(p.v[3], q));
|
||||
}
|
||||
|
||||
|
||||
|
||||
mat2 adjugate(mat2 p)
|
||||
{
|
||||
mat2 res;
|
||||
res.v[0] = float2(p.v[1].y, -p.v[0].y);
|
||||
res.v[1] = float2(-p.v[1].x, p.v[0].x);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
||||
mat2 invert(mat2 p)
|
||||
{
|
||||
float rdet = 1.0f / determinant(p);
|
||||
mat2 res;
|
||||
res.v[0] = float2(p.v[1].y, -p.v[0].y) * rdet;
|
||||
res.v[1] = float2(-p.v[1].x, p.v[0].x) * rdet;
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
||||
mat3 adjugate(mat3 p)
|
||||
{
|
||||
mat3 res;
|
||||
float3 prd0 = cross(p.v[1], p.v[2]);
|
||||
float3 prd1 = cross(p.v[2], p.v[0]);
|
||||
float3 prd2 = cross(p.v[0], p.v[1]);
|
||||
res.v[0] = float3(prd0.x, prd1.x, prd2.x);
|
||||
res.v[1] = float3(prd0.y, prd1.y, prd2.y);
|
||||
res.v[2] = float3(prd0.z, prd1.z, prd2.z);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
||||
mat3 invert(mat3 p)
|
||||
{
|
||||
float3 cross0 = cross(p.v[1], p.v[2]);
|
||||
float det = dot(cross0, p.v[0]);
|
||||
float rdet = 1.0f / det;
|
||||
mat3 res;
|
||||
float3 prd0 = cross0 * rdet;
|
||||
float3 prd1 = cross(p.v[2], p.v[0]) * rdet;
|
||||
float3 prd2 = cross(p.v[0], p.v[1]) * rdet;
|
||||
res.v[0] = float3(prd0.x, prd1.x, prd2.x);
|
||||
res.v[1] = float3(prd0.y, prd1.y, prd2.y);
|
||||
res.v[2] = float3(prd0.z, prd1.z, prd2.z);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
||||
mat4 adjugate(mat4 p)
|
||||
{
|
||||
mat4 res;
|
||||
|
||||
float3 bpc0 = cross(p.v[2].yzw, p.v[3].yzw);
|
||||
float3 tpc0 = cross(p.v[0].yzw, p.v[1].yzw);
|
||||
res.v[0] = float4(dot(bpc0, p.v[1].yzw), -dot(bpc0, p.v[0].yzw), dot(tpc0, p.v[3].yzw), -dot(tpc0, p.v[2].yzw));
|
||||
|
||||
float3 bpc1 = cross(p.v[2].xzw, p.v[3].xzw);
|
||||
float3 tpc1 = cross(p.v[0].xzw, p.v[1].xzw);
|
||||
res.v[1] = float4(-dot(bpc1, p.v[1].xzw), dot(bpc1, p.v[0].xzw), -dot(tpc1, p.v[3].xzw), dot(tpc1, p.v[2].xzw));
|
||||
|
||||
float3 bpc2 = cross(p.v[2].xyw, p.v[3].xyw);
|
||||
float3 tpc2 = cross(p.v[0].xyw, p.v[1].xyw);
|
||||
res.v[2] = float4(dot(bpc2, p.v[1].xyw), -dot(bpc2, p.v[0].xyw), dot(tpc2, p.v[3].xyw), -dot(tpc2, p.v[2].xyw));
|
||||
|
||||
float3 bpc3 = cross(p.v[2].xyz, p.v[3].xyz);
|
||||
float3 tpc3 = cross(p.v[0].xyz, p.v[1].xyz);
|
||||
res.v[3] = float4(-dot(bpc3, p.v[1].xyz), dot(bpc3, p.v[0].xyz), -dot(tpc3, p.v[3].xyz), dot(tpc3, p.v[2].xyz));
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
||||
mat4 invert(mat4 p)
|
||||
{
|
||||
// cross products between the bottom two rows
|
||||
float3 bpc0 = cross(p.v[2].yzw, p.v[3].yzw);
|
||||
float3 bpc1 = cross(p.v[2].xzw, p.v[3].xzw);
|
||||
float3 bpc2 = cross(p.v[2].xyw, p.v[3].xyw);
|
||||
float3 bpc3 = cross(p.v[2].xyz, p.v[3].xyz);
|
||||
|
||||
// dot-products for the top rows
|
||||
float4 row1 = float4(dot(bpc0, p.v[1].yzw),
|
||||
-dot(bpc1, p.v[1].xzw),
|
||||
dot(bpc2, p.v[1].xyw),
|
||||
-dot(bpc3, p.v[1].xyz));
|
||||
|
||||
float det = dot(p.v[0], row1);
|
||||
float rdet = 1.0f / det;
|
||||
|
||||
mat4 res;
|
||||
|
||||
float3 tpc0 = cross(p.v[0].yzw, p.v[1].yzw);
|
||||
res.v[0] = float4(row1.x, -dot(bpc0, p.v[0].yzw), dot(tpc0, p.v[3].yzw), -dot(tpc0, p.v[2].yzw)) * rdet;
|
||||
|
||||
float3 tpc1 = cross(p.v[0].xzw, p.v[1].xzw);
|
||||
res.v[1] = float4(row1.y, dot(bpc1, p.v[0].xzw), -dot(tpc1, p.v[3].xzw), dot(tpc1, p.v[2].xzw)) * rdet;
|
||||
float3 tpc2 = cross(p.v[0].xyw, p.v[1].xyw);
|
||||
|
||||
res.v[2] = float4(row1.z, -dot(bpc2, p.v[0].xyw), dot(tpc2, p.v[3].xyw), -dot(tpc2, p.v[2].xyw)) * rdet;
|
||||
|
||||
float3 tpc3 = cross(p.v[0].xyz, p.v[1].xyz);
|
||||
res.v[3] = float4(row1.w, dot(bpc3, p.v[0].xyz), -dot(tpc3, p.v[3].xyz), dot(tpc3, p.v[2].xyz)) * rdet;
|
||||
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
||||
float2 eigenvalues(mat2 p)
|
||||
{
|
||||
return solve_monic(characteristic_poly(p));
|
||||
}
|
||||
|
||||
float3 eigenvalues(mat3 p)
|
||||
{
|
||||
return solve_monic(characteristic_poly(p));
|
||||
}
|
||||
|
||||
float4 eigenvalues(mat4 p)
|
||||
{
|
||||
return solve_monic(characteristic_poly(p));
|
||||
}
|
||||
|
||||
float2 eigenvector(mat2 p, float eigvl)
|
||||
{
|
||||
// for a mat2, we first reverse-subtract the eigenvalue from the matrix diagonal,
|
||||
// then return whichever row had the larger sum-of-absolute-values.
|
||||
float4 v = float4(p.v[0], p.v[1]);
|
||||
v.xw = eigvl - v.xw;
|
||||
if (fabs(v.x) + fabs(v.y) > fabs(v.z) + fabs(v.w))
|
||||
return v.yx;
|
||||
else
|
||||
return v.wz;
|
||||
}
|
||||
|
||||
|
||||
float3 eigenvector(mat3 p, float eigvl)
|
||||
{
|
||||
// for a mat3, we obtain the eigenvector as follows:
|
||||
// step 1: subtract the eigenvalue from the matrix diagonal
|
||||
// step 2: take two cross products between rows in the matrix
|
||||
// step 3: return whichever of the cross products resulted in a longer vector.
|
||||
|
||||
float3 r0 = p.v[0];
|
||||
float3 r1 = p.v[1];
|
||||
float3 r2 = p.v[2];
|
||||
|
||||
r0.x = r0.x - eigvl;
|
||||
r1.y = r1.y - eigvl;
|
||||
r2.z = r2.z - eigvl;
|
||||
|
||||
float3 v1 = cross(r0, r1);
|
||||
float3 v2 = cross(r1, r2);
|
||||
|
||||
float len1 = dot(v1, v1);
|
||||
float len2 = dot(v2, v2);
|
||||
return len1 > len2 ? v1 : v2;
|
||||
}
|
||||
|
||||
|
||||
// generalized cross product: 3 vectors with 4 components each.
|
||||
// The result is a vector that is perpendicular to all the three specified vectors.
|
||||
|
||||
// it works in the sense that it produces a perpendicular-to-everything vector,
|
||||
// but it has not been tested whether it points in the "right" direction.
|
||||
float4 gcross(float4 p, float4 q, float4 r)
|
||||
{
|
||||
return float4(dot(p.yzw, cross(q.yzw, r.yzw)), -dot(p.xzw, cross(q.xzw, r.xzw)), dot(p.xyw, cross(q.xyw, r.xyw)), -dot(p.xyz, cross(q.xyz, r.xyz)));
|
||||
}
|
||||
|
||||
|
||||
|
||||
float4 eigenvector(mat4 p, float eigvl)
|
||||
{
|
||||
float4 r0 = p.v[0];
|
||||
float4 r1 = p.v[1];
|
||||
float4 r2 = p.v[2];
|
||||
float4 r3 = p.v[3];
|
||||
|
||||
r0.x = r0.x - eigvl;
|
||||
r1.y = r1.y - eigvl;
|
||||
r2.z = r2.z - eigvl;
|
||||
r3.w = r3.w - eigvl;
|
||||
|
||||
// generate four candidate vectors using the generalized cross product.
|
||||
// These will in general point in the same direction (or 180 degree opposite),
|
||||
// however they will have different lengths. Pick the longest one.
|
||||
float3 tpc0 = cross(r0.yzw, r1.yzw);
|
||||
float3 tpc1 = cross(r0.xzw, r1.xzw);
|
||||
float3 tpc2 = cross(r0.xyw, r1.xyw);
|
||||
float3 tpc3 = cross(r0.xyz, r1.xyz);
|
||||
|
||||
float4 v1 = float4(dot(r2.yzw, tpc0),
|
||||
-dot(r2.xzw, tpc1),
|
||||
dot(r2.xyw, tpc2),
|
||||
-dot(r2.xyz, tpc3));
|
||||
|
||||
float4 v2 = float4(dot(r3.yzw, tpc0),
|
||||
-dot(r3.xzw, tpc1),
|
||||
dot(r3.xyw, tpc2),
|
||||
-dot(r3.xyz, tpc3));
|
||||
|
||||
float3 bpc0 = cross(r2.yzw, r3.yzw);
|
||||
float3 bpc1 = cross(r2.xzw, r3.xzw);
|
||||
float3 bpc2 = cross(r2.xyw, r3.xyw);
|
||||
float3 bpc3 = cross(r2.xyz, r3.xyz);
|
||||
|
||||
float4 v3 = float4(dot(r0.yzw, bpc0),
|
||||
-dot(r0.xzw, bpc1),
|
||||
dot(r0.xyw, bpc2),
|
||||
-dot(r0.xyz, bpc3));
|
||||
|
||||
float4 v4 = float4(dot(r1.yzw, bpc0),
|
||||
-dot(r1.xzw, bpc1),
|
||||
dot(r1.xyw, bpc2),
|
||||
-dot(r1.xyz, bpc3));
|
||||
|
||||
float len1 = dot(v1, v1);
|
||||
float len2 = dot(v2, v2);
|
||||
float len3 = dot(v3, v3);
|
||||
float len4 = dot(v4, v4);
|
||||
|
||||
if (fmax(len1, len2) > fmax(len3, len4))
|
||||
return len1 > len2 ? v1 : v2;
|
||||
else
|
||||
return len3 > len4 ? v3 : v4;
|
||||
}
|
||||
|
||||
|
||||
// matrix multiply
|
||||
|
||||
mat2 operator *(mat2 a, mat2 b)
|
||||
{
|
||||
mat2 res;
|
||||
res.v[0] = a.v[0].x * b.v[0] + a.v[0].y * b.v[1];
|
||||
res.v[1] = a.v[1].x * b.v[0] + a.v[1].y * b.v[1];
|
||||
return res;
|
||||
}
|
||||
|
||||
mat3 operator *(mat3 a, mat3 b)
|
||||
{
|
||||
mat3 res;
|
||||
res.v[0] = a.v[0].x * b.v[0] + a.v[0].y * b.v[1] + a.v[0].z * b.v[2];
|
||||
res.v[1] = a.v[1].x * b.v[0] + a.v[1].y * b.v[1] + a.v[1].z * b.v[2];
|
||||
res.v[2] = a.v[2].x * b.v[0] + a.v[2].y * b.v[1] + a.v[2].z * b.v[2];
|
||||
return res;
|
||||
}
|
||||
|
||||
mat4 operator *(mat4 a, mat4 b)
|
||||
{
|
||||
mat4 res;
|
||||
res.v[0] = a.v[0].x * b.v[0] + a.v[0].y * b.v[1] + a.v[0].z * b.v[2] + a.v[0].w * b.v[3];
|
||||
res.v[1] = a.v[1].x * b.v[0] + a.v[1].y * b.v[1] + a.v[1].z * b.v[2] + a.v[1].w * b.v[3];
|
||||
res.v[2] = a.v[2].x * b.v[0] + a.v[2].y * b.v[1] + a.v[2].z * b.v[2] + a.v[2].w * b.v[3];
|
||||
res.v[3] = a.v[3].x * b.v[0] + a.v[3].y * b.v[1] + a.v[3].z * b.v[2] + a.v[3].w * b.v[3];
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************
|
||||
|
||||
simple geometric functions
|
||||
|
||||
*************************/
|
||||
|
||||
|
||||
// return parameter value for the point on the line closest to the specified point
|
||||
float param_nearest_on_line(float2 point, line2 line)
|
||||
{
|
||||
return dot(point - line.a, line.b) / dot(line.b, line.b);
|
||||
}
|
||||
|
||||
float param_nearest_on_line(float3 point, line3 line)
|
||||
{
|
||||
return dot(point - line.a, line.b) / dot(line.b, line.b);
|
||||
}
|
||||
|
||||
float param_nearest_on_line(float4 point, line4 line)
|
||||
{
|
||||
return dot(point - line.a, line.b) / dot(line.b, line.b);
|
||||
}
|
||||
|
||||
|
||||
// return distance between point and line
|
||||
float point_line_distance(float2 point, line2 line)
|
||||
{
|
||||
return distance(point, line.a + line.b * param_nearest_on_line(point, line));
|
||||
}
|
||||
|
||||
float point_line_distance(float3 point, line3 line)
|
||||
{
|
||||
return distance(point, line.a + line.b * param_nearest_on_line(point, line));
|
||||
}
|
||||
|
||||
float point_line_distance(float4 point, line4 line)
|
||||
{
|
||||
return distance(point, line.a + line.b * param_nearest_on_line(point, line));
|
||||
}
|
||||
|
||||
|
||||
float point_line_distance_sqr(float2 point, line2 line)
|
||||
{
|
||||
return distance_sqr(point, line.a + line.b * param_nearest_on_line(point, line));
|
||||
}
|
||||
|
||||
float point_line_distance_sqr(float3 point, line3 line)
|
||||
{
|
||||
return distance_sqr(point, line.a + line.b * param_nearest_on_line(point, line));
|
||||
}
|
||||
|
||||
float point_line_distance_sqr(float4 point, line4 line)
|
||||
{
|
||||
return distance_sqr(point, line.a + line.b * param_nearest_on_line(point, line));
|
||||
}
|
||||
|
||||
|
||||
|
||||
// distance between plane/hyperplane in 3D and 4D
|
||||
float point_plane_3d_distance(float3 point, plane_3d plane)
|
||||
{
|
||||
return dot(point - plane.root_point, plane.normal);
|
||||
}
|
||||
|
||||
|
||||
float point_hyperplane_4d_distance(float4 point, hyperplane_4d plane)
|
||||
{
|
||||
return dot(point - plane.root_point, plane.normal);
|
||||
}
|
||||
|
||||
|
||||
// helper functions to produce a 3D plane from three points and a 4D hyperplane from four points.
|
||||
plane_3d generate_plane_from_points(float3 point0, float3 point1, float3 point2)
|
||||
{
|
||||
plane_3d res;
|
||||
res.root_point = point0;
|
||||
res.normal = normalize(cross(point1 - point0, point2 - point0));
|
||||
return res;
|
||||
}
|
||||
|
||||
hyperplane_4d generate_hyperplane_from_points(float4 point0, float4 point1, float4 point2, float4 point3)
|
||||
{
|
||||
hyperplane_4d res;
|
||||
res.root_point = point0;
|
||||
res.normal = normalize(gcross(point1 - point0, point2 - point0, point3 - point0));
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
200
3rdparty/astc/mathlib.h
vendored
Normal file
200
3rdparty/astc/mathlib.h
vendored
Normal file
@@ -0,0 +1,200 @@
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This confidential and proprietary software may be used only as
|
||||
* authorised by a licensing agreement from ARM Limited
|
||||
* (C) COPYRIGHT 2011-2012, 2018 ARM Limited
|
||||
* ALL RIGHTS RESERVED
|
||||
*
|
||||
* The entire notice above must be reproduced on all authorised
|
||||
* copies and copies may only be made to the extent permitted
|
||||
* by a licensing agreement from ARM Limited.
|
||||
*
|
||||
* @brief Internal math library declarations for ASTC codec.
|
||||
*/
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
#ifndef MATHLIB_H_INCLUDED
|
||||
|
||||
#define MATHLIB_H_INCLUDED
|
||||
|
||||
#include "vectypes.h"
|
||||
|
||||
// basic OpenCL functions
|
||||
float inversesqrt(float p);
|
||||
float acospi(float p);
|
||||
float sinpi(float p);
|
||||
float cospi(float p);
|
||||
|
||||
float nan(int p);
|
||||
|
||||
#if __cplusplus < 201103L
|
||||
float fmax(float p, float q);
|
||||
float fmin(float p, float q);
|
||||
#endif // C++11
|
||||
|
||||
float2 fmax(float2 p, float2 q);
|
||||
|
||||
float3 fmax(float3 p, float3 q);
|
||||
|
||||
float4 fmax(float4 p, float4 q);
|
||||
float2 fmin(float2 p, float2 q);
|
||||
float3 fmin(float3 p, float3 q);
|
||||
float4 fmin(float4 p, float4 q);
|
||||
|
||||
/*
|
||||
float dot( float2 p, float2 q );
|
||||
float dot( float3 p, float3 q );
|
||||
float dot( float4 p, float4 q );
|
||||
*/
|
||||
|
||||
static inline float dot(float2 p, float2 q)
|
||||
{
|
||||
return p.x * q.x + p.y * q.y;
|
||||
}
|
||||
static inline float dot(float3 p, float3 q)
|
||||
{
|
||||
return p.x * q.x + p.y * q.y + p.z * q.z;
|
||||
}
|
||||
static inline float dot(float4 p, float4 q)
|
||||
{
|
||||
return p.x * q.x + p.y * q.y + p.z * q.z + p.w * q.w;
|
||||
}
|
||||
|
||||
|
||||
float3 cross(float3 p, float3 q);
|
||||
float4 cross(float4 p, float4 q);
|
||||
|
||||
float length(float2 p);
|
||||
float length(float3 p);
|
||||
float length(float4 p);
|
||||
|
||||
float length_sqr(float2 p);
|
||||
float length_sqr(float3 p);
|
||||
float length_sqr(float4 p);
|
||||
|
||||
float distance(float2 p, float2 q);
|
||||
float distance(float3 p, float3 q);
|
||||
float distance(float4 p, float4 q);
|
||||
|
||||
float distance_sqr(float2 p, float2 q);
|
||||
float distance_sqr(float3 p, float3 q);
|
||||
float distance_sqr(float4 p, float4 q);
|
||||
|
||||
float2 normalize(float2 p);
|
||||
float3 normalize(float3 p);
|
||||
float4 normalize(float4 p);
|
||||
|
||||
|
||||
|
||||
// functions other than just basic OpenCL functions
|
||||
|
||||
float4 gcross(float4 p, float4 q, float4 r);
|
||||
|
||||
struct mat2
|
||||
{
|
||||
float2 v[2];
|
||||
};
|
||||
struct mat3
|
||||
{
|
||||
float3 v[3];
|
||||
};
|
||||
struct mat4
|
||||
{
|
||||
float4 v[4];
|
||||
};
|
||||
|
||||
float trace(mat2 p);
|
||||
float trace(mat3 p);
|
||||
float trace(mat4 p);
|
||||
|
||||
float determinant(mat2 p);
|
||||
float determinant(mat3 p);
|
||||
float determinant(mat4 p);
|
||||
|
||||
float2 characteristic_poly(mat2 p);
|
||||
float3 characteristic_poly(mat3 p);
|
||||
float4 characteristic_poly(mat4 p);
|
||||
|
||||
float2 solve_monic(float2 p);
|
||||
float3 solve_monic(float3 p);
|
||||
float4 solve_monic(float4 p);
|
||||
|
||||
float2 transform(mat2 p, float2 q);
|
||||
float3 transform(mat3 p, float3 q);
|
||||
float4 transform(mat4 p, float4 q);
|
||||
|
||||
mat2 adjugate(mat2 p);
|
||||
mat3 adjugate(mat3 p);
|
||||
mat4 adjugate(mat4 p);
|
||||
|
||||
mat2 invert(mat2 p);
|
||||
mat3 invert(mat3 p);
|
||||
mat4 invert(mat4 p);
|
||||
|
||||
float2 eigenvalues(mat2 p);
|
||||
float3 eigenvalues(mat3 p);
|
||||
float4 eigenvalues(mat4 p);
|
||||
|
||||
float2 eigenvector(mat2 p, float eigvl);
|
||||
float3 eigenvector(mat3 p, float eigvl);
|
||||
float4 eigenvector(mat4 p, float eigvl);
|
||||
|
||||
mat2 operator *(mat2 a, mat2 b);
|
||||
mat3 operator *(mat3 a, mat3 b);
|
||||
mat4 operator *(mat4 a, mat4 b);
|
||||
|
||||
|
||||
|
||||
// parametric line, 2D: The line is given by line = a + b*t.
|
||||
struct line2
|
||||
{
|
||||
float2 a;
|
||||
float2 b;
|
||||
};
|
||||
|
||||
// parametric line, 3D
|
||||
struct line3
|
||||
{
|
||||
float3 a;
|
||||
float3 b;
|
||||
};
|
||||
|
||||
struct line4
|
||||
{
|
||||
float4 a;
|
||||
float4 b;
|
||||
};
|
||||
|
||||
// plane/hyperplane defined by a point and a normal vector
|
||||
struct plane_3d
|
||||
{
|
||||
float3 root_point;
|
||||
float3 normal; // normalized
|
||||
};
|
||||
|
||||
struct hyperplane_4d
|
||||
{
|
||||
float4 root_point;
|
||||
float4 normal; // normalized
|
||||
};
|
||||
|
||||
float param_nearest_on_line(float2 point, line2 line);
|
||||
float param_nearest_on_line(float3 point, line3 line);
|
||||
float param_nearest_on_line(float4 point, line4 line);
|
||||
|
||||
float point_line_distance(float2 point, line2 line);
|
||||
float point_line_distance(float3 point, line3 line);
|
||||
float point_line_distance(float4 point, line4 line);
|
||||
|
||||
float point_line_distance_sqr(float2 point, line2 line);
|
||||
float point_line_distance_sqr(float3 point, line3 line);
|
||||
float point_line_distance_sqr(float4 point, line4 line);
|
||||
|
||||
float point_plane_3d_distance(float3 point, plane_3d plane);
|
||||
float point_hyperplane_4d_distance(float4 point, hyperplane_4d plane);
|
||||
|
||||
plane_3d generate_plane_from_points(float3 point0, float3 point1, float3 point2);
|
||||
hyperplane_4d generate_hyperplane_from_points(float4 point0, float4 point1, float4 point2, float4 point3);
|
||||
|
||||
|
||||
#endif
|
||||
1
3rdparty/astc/readme.txt
vendored
Normal file
1
3rdparty/astc/readme.txt
vendored
Normal file
@@ -0,0 +1 @@
|
||||
Library version of astc-encoder, from https://github.com/andrewwillmott/astc-encoder.
|
||||
398
3rdparty/astc/softfloat.cpp
vendored
Normal file
398
3rdparty/astc/softfloat.cpp
vendored
Normal file
@@ -0,0 +1,398 @@
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This confidential and proprietary software may be used only as
|
||||
* authorised by a licensing agreement from ARM Limited
|
||||
* (C) COPYRIGHT 2011-2012 ARM Limited
|
||||
* ALL RIGHTS RESERVED
|
||||
*
|
||||
* The entire notice above must be reproduced on all authorised
|
||||
* copies and copies may only be made to the extent permitted
|
||||
* by a licensing agreement from ARM Limited.
|
||||
*
|
||||
* @brief Soft IEEE-754 floating point library.
|
||||
*/
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
#include "softfloat.h"
|
||||
|
||||
#define SOFTFLOAT_INLINE
|
||||
|
||||
/******************************************
|
||||
helper functions and their lookup tables
|
||||
******************************************/
|
||||
/* count leading zeros functions. Only used when the input is nonzero. */
|
||||
|
||||
#if defined(__GNUC__) && (defined(__i386) || defined(__amd64))
|
||||
#elif defined(__arm__) && defined(__ARMCC_VERSION)
|
||||
#elif defined(__arm__) && defined(__GNUC__)
|
||||
#else
|
||||
/* table used for the slow default versions. */
|
||||
static const uint8_t clz_table[256] =
|
||||
{
|
||||
8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
32-bit count-leading-zeros function: use the Assembly instruction whenever possible. */
|
||||
SOFTFLOAT_INLINE uint32_t clz32(uint32_t inp)
|
||||
{
|
||||
#if defined(__GNUC__) && (defined(__i386) || defined(__amd64))
|
||||
uint32_t bsr;
|
||||
__asm__("bsrl %1, %0": "=r"(bsr):"r"(inp | 1));
|
||||
return 31 - bsr;
|
||||
#else
|
||||
#if defined(__arm__) && defined(__ARMCC_VERSION)
|
||||
return __clz(inp); /* armcc builtin */
|
||||
#else
|
||||
#if defined(__arm__) && defined(__GNUC__)
|
||||
uint32_t lz;
|
||||
__asm__("clz %0, %1": "=r"(lz):"r"(inp));
|
||||
return lz;
|
||||
#else
|
||||
/* slow default version */
|
||||
uint32_t summa = 24;
|
||||
if (inp >= UINT32_C(0x10000))
|
||||
{
|
||||
inp >>= 16;
|
||||
summa -= 16;
|
||||
}
|
||||
if (inp >= UINT32_C(0x100))
|
||||
{
|
||||
inp >>= 8;
|
||||
summa -= 8;
|
||||
}
|
||||
return summa + clz_table[inp];
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
static SOFTFLOAT_INLINE uint32_t rtne_shift32(uint32_t inp, uint32_t shamt)
|
||||
{
|
||||
uint32_t vl1 = UINT32_C(1) << shamt;
|
||||
uint32_t inp2 = inp + (vl1 >> 1); /* added 0.5 ULP */
|
||||
uint32_t msk = (inp | UINT32_C(1)) & vl1; /* nonzero if odd. '| 1' forces it to 1 if the shamt is 0. */
|
||||
msk--; /* negative if even, nonnegative if odd. */
|
||||
inp2 -= (msk >> 31); /* subtract epsilon before shift if even. */
|
||||
inp2 >>= shamt;
|
||||
return inp2;
|
||||
}
|
||||
|
||||
static SOFTFLOAT_INLINE uint32_t rtna_shift32(uint32_t inp, uint32_t shamt)
|
||||
{
|
||||
uint32_t vl1 = (UINT32_C(1) << shamt) >> 1;
|
||||
inp += vl1;
|
||||
inp >>= shamt;
|
||||
return inp;
|
||||
}
|
||||
|
||||
|
||||
static SOFTFLOAT_INLINE uint32_t rtup_shift32(uint32_t inp, uint32_t shamt)
|
||||
{
|
||||
uint32_t vl1 = UINT32_C(1) << shamt;
|
||||
inp += vl1;
|
||||
inp--;
|
||||
inp >>= shamt;
|
||||
return inp;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/* convert from FP16 to FP32. */
|
||||
sf32 sf16_to_sf32(sf16 inp)
|
||||
{
|
||||
uint32_t inpx = inp;
|
||||
|
||||
/*
|
||||
This table contains, for every FP16 sign/exponent value combination,
|
||||
the difference between the input FP16 value and the value obtained
|
||||
by shifting the correct FP32 result right by 13 bits.
|
||||
This table allows us to handle every case except denormals and NaN
|
||||
with just 1 table lookup, 2 shifts and 1 add.
|
||||
*/
|
||||
|
||||
#define WITH_MB(a) INT32_C((a) | (1 << 31))
|
||||
static const int32_t tbl[64] =
|
||||
{
|
||||
WITH_MB(0x00000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000),
|
||||
INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000),
|
||||
INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000),
|
||||
INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), WITH_MB(0x38000),
|
||||
WITH_MB(0x38000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000),
|
||||
INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000),
|
||||
INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000),
|
||||
INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), WITH_MB(0x70000)
|
||||
};
|
||||
|
||||
int32_t res = tbl[inpx >> 10];
|
||||
res += inpx;
|
||||
|
||||
/* the normal cases: the MSB of 'res' is not set. */
|
||||
if (res >= 0) /* signed compare */
|
||||
return res << 13;
|
||||
|
||||
/* Infinity and Zero: the bottom 10 bits of 'res' are clear. */
|
||||
if ((res & UINT32_C(0x3FF)) == 0)
|
||||
return res << 13;
|
||||
|
||||
/* NaN: the exponent field of 'inp' is not zero; NaNs must be quietened. */
|
||||
if ((inpx & 0x7C00) != 0)
|
||||
return (res << 13) | UINT32_C(0x400000);
|
||||
|
||||
/* the remaining cases are Denormals. */
|
||||
{
|
||||
uint32_t sign = (inpx & UINT32_C(0x8000)) << 16;
|
||||
uint32_t mskval = inpx & UINT32_C(0x7FFF);
|
||||
uint32_t leadingzeroes = clz32(mskval);
|
||||
mskval <<= leadingzeroes;
|
||||
return (mskval >> 8) + ((0x85 - leadingzeroes) << 23) + sign;
|
||||
}
|
||||
}
|
||||
|
||||
/* Conversion routine that converts from FP32 to FP16. It supports denormals and all rounding modes. If a NaN is given as input, it is quietened. */
|
||||
|
||||
sf16 sf32_to_sf16(sf32 inp, roundmode rmode)
|
||||
{
|
||||
/* for each possible sign/exponent combination, store a case index. This gives a 512-byte table */
|
||||
static const uint8_t tab[512] = {
|
||||
0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
20, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 40,
|
||||
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
|
||||
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
|
||||
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
|
||||
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
|
||||
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
|
||||
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
|
||||
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 50,
|
||||
|
||||
5, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||
15, 15, 15, 15, 15, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||
25, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
|
||||
35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 45,
|
||||
45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
|
||||
45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
|
||||
45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
|
||||
45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
|
||||
45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
|
||||
45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
|
||||
45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 55,
|
||||
};
|
||||
|
||||
/* many of the cases below use a case-dependent magic constant. So we look up a magic constant before actually performing the switch. This table allows us to group cases, thereby minimizing code
|
||||
size. */
|
||||
static const uint32_t tabx[60] = {
|
||||
UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0x8000), UINT32_C(0x80000000), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000),
|
||||
UINT32_C(1), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0x8000), UINT32_C(0x8001), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000),
|
||||
UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000),
|
||||
UINT32_C(0xC8001FFF), UINT32_C(0xC8000000), UINT32_C(0xC8000000), UINT32_C(0xC8000FFF), UINT32_C(0xC8001000),
|
||||
UINT32_C(0x58000000), UINT32_C(0x38001FFF), UINT32_C(0x58000000), UINT32_C(0x58000FFF), UINT32_C(0x58001000),
|
||||
UINT32_C(0x7C00), UINT32_C(0x7BFF), UINT32_C(0x7BFF), UINT32_C(0x7C00), UINT32_C(0x7C00),
|
||||
UINT32_C(0xFBFF), UINT32_C(0xFC00), UINT32_C(0xFBFF), UINT32_C(0xFC00), UINT32_C(0xFC00),
|
||||
UINT32_C(0x90000000), UINT32_C(0x90000000), UINT32_C(0x90000000), UINT32_C(0x90000000), UINT32_C(0x90000000),
|
||||
UINT32_C(0x20000000), UINT32_C(0x20000000), UINT32_C(0x20000000), UINT32_C(0x20000000), UINT32_C(0x20000000)
|
||||
};
|
||||
|
||||
uint32_t p;
|
||||
uint32_t idx = rmode + tab[inp >> 23];
|
||||
uint32_t vlx = tabx[idx];
|
||||
switch (idx)
|
||||
{
|
||||
/*
|
||||
Positive number which may be Infinity or NaN.
|
||||
We need to check whether it is NaN; if it is, quieten it by setting the top bit of the mantissa.
|
||||
(If we don't do this quieting, then a NaN that is distinguished only by having
|
||||
its low-order bits set, would be turned into an INF. */
|
||||
case 50:
|
||||
case 51:
|
||||
case 52:
|
||||
case 53:
|
||||
case 54:
|
||||
case 55:
|
||||
case 56:
|
||||
case 57:
|
||||
case 58:
|
||||
case 59:
|
||||
/*
|
||||
the input value is 0x7F800000 or 0xFF800000 if it is INF.
|
||||
By subtracting 1, we get 7F7FFFFF or FF7FFFFF, that is, bit 23 becomes zero.
|
||||
For NaNs, however, this operation will keep bit 23 with the value 1.
|
||||
We can then extract bit 23, and logical-OR bit 9 of the result with this
|
||||
bit in order to quieten the NaN (a Quiet NaN is a NaN where the top bit
|
||||
of the mantissa is set.)
|
||||
*/
|
||||
p = (inp - 1) & UINT32_C(0x800000); /* zero if INF, nonzero if NaN. */
|
||||
return ((inp + vlx) >> 13) | (p >> 14);
|
||||
/*
|
||||
positive, exponent = 0, round-mode == UP; need to check whether number actually is 0.
|
||||
If it is, then return 0, else return 1 (the smallest representable nonzero number)
|
||||
*/
|
||||
case 0:
|
||||
/*
|
||||
-inp will set the MSB if the input number is nonzero.
|
||||
Thus (-inp) >> 31 will turn into 0 if the input number is 0 and 1 otherwise.
|
||||
*/
|
||||
return (uint32_t) (-(int32_t) inp) >> 31;
|
||||
|
||||
/*
|
||||
negative, exponent = , round-mode == DOWN, need to check whether number is
|
||||
actually 0. If it is, return 0x8000 ( float -0.0 )
|
||||
Else return the smallest negative number ( 0x8001 ) */
|
||||
case 6:
|
||||
/*
|
||||
in this case 'vlx' is 0x80000000. By subtracting the input value from it,
|
||||
we obtain a value that is 0 if the input value is in fact zero and has
|
||||
the MSB set if it isn't. We then right-shift the value by 31 places to
|
||||
get a value that is 0 if the input is -0.0 and 1 otherwise.
|
||||
*/
|
||||
return ((vlx - inp) >> 31) + UINT32_C(0x8000);
|
||||
|
||||
/*
|
||||
for all other cases involving underflow/overflow, we don't need to
|
||||
do actual tests; we just return 'vlx'.
|
||||
*/
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
case 4:
|
||||
case 5:
|
||||
case 7:
|
||||
case 8:
|
||||
case 9:
|
||||
case 10:
|
||||
case 11:
|
||||
case 12:
|
||||
case 13:
|
||||
case 14:
|
||||
case 15:
|
||||
case 16:
|
||||
case 17:
|
||||
case 18:
|
||||
case 19:
|
||||
case 40:
|
||||
case 41:
|
||||
case 42:
|
||||
case 43:
|
||||
case 44:
|
||||
case 45:
|
||||
case 46:
|
||||
case 47:
|
||||
case 48:
|
||||
case 49:
|
||||
return vlx;
|
||||
|
||||
/*
|
||||
for normal numbers, 'vlx' is the difference between the FP32 value of a number and the
|
||||
FP16 representation of the same number left-shifted by 13 places. In addition, a rounding constant is
|
||||
baked into 'vlx': for rounding-away-from zero, the constant is 2^13 - 1, causing roundoff away
|
||||
from zero. for round-to-nearest away, the constant is 2^12, causing roundoff away from zero.
|
||||
for round-to-nearest-even, the constant is 2^12 - 1. This causes correct round-to-nearest-even
|
||||
except for odd input numbers. For odd input numbers, we need to add 1 to the constant. */
|
||||
|
||||
/* normal number, all rounding modes except round-to-nearest-even: */
|
||||
case 30:
|
||||
case 31:
|
||||
case 32:
|
||||
case 34:
|
||||
case 35:
|
||||
case 36:
|
||||
case 37:
|
||||
case 39:
|
||||
return (inp + vlx) >> 13;
|
||||
|
||||
/* normal number, round-to-nearest-even. */
|
||||
case 33:
|
||||
case 38:
|
||||
p = inp + vlx;
|
||||
p += (inp >> 13) & 1;
|
||||
return p >> 13;
|
||||
|
||||
/*
|
||||
the various denormal cases. These are not expected to be common, so their performance is a bit
|
||||
less important. For each of these cases, we need to extract an exponent and a mantissa
|
||||
(including the implicit '1'!), and then right-shift the mantissa by a shift-amount that
|
||||
depends on the exponent. The shift must apply the correct rounding mode. 'vlx' is used to supply the
|
||||
sign of the resulting denormal number.
|
||||
*/
|
||||
case 21:
|
||||
case 22:
|
||||
case 25:
|
||||
case 27:
|
||||
/* denormal, round towards zero. */
|
||||
p = 126 - ((inp >> 23) & 0xFF);
|
||||
return (((inp & UINT32_C(0x7FFFFF)) + UINT32_C(0x800000)) >> p) | vlx;
|
||||
case 20:
|
||||
case 26:
|
||||
/* denormal, round away from zero. */
|
||||
p = 126 - ((inp >> 23) & 0xFF);
|
||||
return rtup_shift32((inp & UINT32_C(0x7FFFFF)) + UINT32_C(0x800000), p) | vlx;
|
||||
case 24:
|
||||
case 29:
|
||||
/* denormal, round to nearest-away */
|
||||
p = 126 - ((inp >> 23) & 0xFF);
|
||||
return rtna_shift32((inp & UINT32_C(0x7FFFFF)) + UINT32_C(0x800000), p) | vlx;
|
||||
case 23:
|
||||
case 28:
|
||||
/* denormal, round to nearest-even. */
|
||||
p = 126 - ((inp >> 23) & 0xFF);
|
||||
return rtne_shift32((inp & UINT32_C(0x7FFFFF)) + UINT32_C(0x800000), p) | vlx;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
typedef union if32_
|
||||
{
|
||||
uint32_t u;
|
||||
int32_t s;
|
||||
float f;
|
||||
} if32;
|
||||
|
||||
/* convert from soft-float to native-float */
|
||||
|
||||
float sf16_to_float(sf16 p)
|
||||
{
|
||||
if32 i;
|
||||
i.u = sf16_to_sf32(p);
|
||||
return i.f;
|
||||
}
|
||||
|
||||
/* convert from native-float to soft-float */
|
||||
|
||||
sf16 float_to_sf16(float p, roundmode rm)
|
||||
{
|
||||
if32 i;
|
||||
i.f = p;
|
||||
return sf32_to_sf16(i.u, rm);
|
||||
}
|
||||
95
3rdparty/astc/softfloat.h
vendored
Normal file
95
3rdparty/astc/softfloat.h
vendored
Normal file
@@ -0,0 +1,95 @@
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This confidential and proprietary software may be used only as
|
||||
* authorised by a licensing agreement from ARM Limited
|
||||
* (C) COPYRIGHT 2011-2012 ARM Limited
|
||||
* ALL RIGHTS RESERVED
|
||||
*
|
||||
* The entire notice above must be reproduced on all authorised
|
||||
* copies and copies may only be made to the extent permitted
|
||||
* by a licensing agreement from ARM Limited.
|
||||
*
|
||||
* @brief Soft IEEE-754 floating point library.
|
||||
*/
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
#ifndef SOFTFLOAT_H_INCLUDED
|
||||
|
||||
#define SOFTFLOAT_H_INCLUDED
|
||||
|
||||
#if defined __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
#if defined __cplusplus && !defined(_MSC_VER)
|
||||
|
||||
/* if compiling as C++, we need to define these macros in order to obtain all the macros in stdint.h . */
|
||||
#define __STDC_LIMIT_MACROS
|
||||
#define __STDC_CONSTANT_MACROS
|
||||
#include <stdint.h>
|
||||
|
||||
#else
|
||||
|
||||
typedef unsigned char uint8_t;
|
||||
typedef signed char int8_t;
|
||||
typedef unsigned short uint16_t;
|
||||
typedef signed short int16_t;
|
||||
typedef unsigned int uint32_t;
|
||||
typedef signed int int32_t;
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
uint32_t clz32(uint32_t p);
|
||||
|
||||
|
||||
/* targets that don't have UINT32_C probably don't have the rest of C99s stdint.h */
|
||||
#ifndef UINT32_C
|
||||
|
||||
#define PASTE(a) a
|
||||
#define UINT64_C(a) PASTE(a##ULL)
|
||||
#define UINT32_C(a) PASTE(a##U)
|
||||
#define INT64_C(a) PASTE(a##LL)
|
||||
#define INT32_C(a) a
|
||||
|
||||
#define PRIX32 "X"
|
||||
#define PRId32 "d"
|
||||
#define PRIu32 "u"
|
||||
#define PRIX64 "LX"
|
||||
#define PRId64 "Ld"
|
||||
#define PRIu64 "Lu"
|
||||
|
||||
#endif
|
||||
|
||||
/* sized soft-float types. These are mapped to the sized integer types of C99, instead of C's
|
||||
floating-point types; this is because the library needs to maintain exact, bit-level control on all
|
||||
operations on these data types. */
|
||||
typedef uint16_t sf16;
|
||||
typedef uint32_t sf32;
|
||||
|
||||
/* the five rounding modes that IEEE-754r defines */
|
||||
typedef enum
|
||||
{
|
||||
SF_UP = 0, /* round towards positive infinity */
|
||||
SF_DOWN = 1, /* round towards negative infinity */
|
||||
SF_TOZERO = 2, /* round towards zero */
|
||||
SF_NEARESTEVEN = 3, /* round toward nearest value; if mid-between, round to even value */
|
||||
SF_NEARESTAWAY = 4 /* round toward nearest value; if mid-between, round away from zero */
|
||||
} roundmode;
|
||||
|
||||
/* narrowing float->float conversions */
|
||||
sf16 sf32_to_sf16(sf32, roundmode);
|
||||
|
||||
/* widening float->float conversions */
|
||||
sf32 sf16_to_sf32(sf16);
|
||||
|
||||
sf16 float_to_sf16(float, roundmode);
|
||||
float sf16_to_float(sf16);
|
||||
|
||||
|
||||
#if defined __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
16209
3rdparty/astc/vectypes.h
vendored
Normal file
16209
3rdparty/astc/vectypes.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
@@ -30,6 +30,8 @@ project "bimg_encode"
|
||||
path.join(BIMG_DIR, "3rdparty/nvtt/**.h"),
|
||||
path.join(BIMG_DIR, "3rdparty/pvrtc/**.cpp"),
|
||||
path.join(BIMG_DIR, "3rdparty/pvrtc/**.h"),
|
||||
path.join(BIMG_DIR, "3rdparty/astc/**.cpp"),
|
||||
path.join(BIMG_DIR, "3rdparty/astc/**.h"),
|
||||
path.join(BIMG_DIR, "3rdparty/tinyexr/**.h"),
|
||||
path.join(BIMG_DIR, "3rdparty/iqa/include/**.h"),
|
||||
path.join(BIMG_DIR, "3rdparty/iqa/source/**.c"),
|
||||
|
||||
@@ -9,27 +9,9 @@ project "texturec"
|
||||
includedirs {
|
||||
path.join(BX_DIR, "include"),
|
||||
path.join(BIMG_DIR, "include"),
|
||||
path.join(BIMG_DIR, "3rdparty"),
|
||||
path.join(BIMG_DIR, "3rdparty/nvtt"),
|
||||
path.join(BIMG_DIR, "3rdparty/iqa/include"),
|
||||
}
|
||||
|
||||
files {
|
||||
path.join(BIMG_DIR, "3rdparty/libsquish/**.cpp"),
|
||||
path.join(BIMG_DIR, "3rdparty/libsquish/**.h"),
|
||||
path.join(BIMG_DIR, "3rdparty/edtaa3/**.cpp"),
|
||||
path.join(BIMG_DIR, "3rdparty/edtaa3/**.h"),
|
||||
path.join(BIMG_DIR, "3rdparty/etc1/**.cpp"),
|
||||
path.join(BIMG_DIR, "3rdparty/etc1/**.h"),
|
||||
path.join(BIMG_DIR, "3rdparty/etc2/**.cpp"),
|
||||
path.join(BIMG_DIR, "3rdparty/etc2/**.hpp"),
|
||||
path.join(BIMG_DIR, "3rdparty/nvtt/**.cpp"),
|
||||
path.join(BIMG_DIR, "3rdparty/nvtt/**.h"),
|
||||
path.join(BIMG_DIR, "3rdparty/pvrtc/**.cpp"),
|
||||
path.join(BIMG_DIR, "3rdparty/pvrtc/**.h"),
|
||||
path.join(BIMG_DIR, "3rdparty/tinyexr/**.h"),
|
||||
path.join(BIMG_DIR, "3rdparty/iqa/include/**.h"),
|
||||
path.join(BIMG_DIR, "3rdparty/iqa/source/**.c"),
|
||||
path.join(BIMG_DIR, "tools/texturec/**.cpp"),
|
||||
path.join(BIMG_DIR, "tools/texturec/**.h"),
|
||||
}
|
||||
|
||||
@@ -19,6 +19,10 @@
|
||||
|
||||
BX_ERROR_RESULT(BIMG_ERROR, BX_MAKEFOURCC('b', 'i', 'm', 'g') );
|
||||
|
||||
#ifndef BIMG_CONFIG_ASTC_DECODE
|
||||
#define BIMG_CONFIG_ASTC_DECODE 0
|
||||
#endif
|
||||
|
||||
namespace bimg
|
||||
{
|
||||
struct Memory
|
||||
|
||||
@@ -3,9 +3,15 @@
|
||||
* License: https://github.com/bkaradzic/bimg#license-bsd-2-clause
|
||||
*/
|
||||
|
||||
#define BIMG_CONFIG_ASTC_DECODE 1
|
||||
|
||||
#include "bimg_p.h"
|
||||
#include <bx/hash.h>
|
||||
|
||||
#if BIMG_CONFIG_ASTC_DECODE
|
||||
#include "../3rdparty/astc/astc_lib.h"
|
||||
#endif
|
||||
|
||||
namespace bimg
|
||||
{
|
||||
static const ImageBlockInfo s_imageBlockInfo[] =
|
||||
@@ -4476,8 +4482,24 @@ namespace bimg
|
||||
case TextureFormat::ASTC8x5:
|
||||
case TextureFormat::ASTC8x6:
|
||||
case TextureFormat::ASTC10x5:
|
||||
# if BIMG_CONFIG_ASTC_DECODE
|
||||
astc_decompress
|
||||
(
|
||||
(const uint8_t*) _src,
|
||||
s_imageBlockInfo[_srcFormat].blockWidth,
|
||||
s_imageBlockInfo[_srcFormat].blockHeight,
|
||||
ASTC_DECODE_LDR_LINEAR,
|
||||
|
||||
_width,
|
||||
_height,
|
||||
(uint8_t*) _dst,
|
||||
ASTC_BGRA,
|
||||
_dstPitch
|
||||
);
|
||||
# else
|
||||
BX_WARN(false, "ASTC decoder is not implemented.");
|
||||
imageCheckerboard(_dst, _width, _height, 16, UINT32_C(0xff000000), UINT32_C(0xffffff00) );
|
||||
# endif
|
||||
break;
|
||||
|
||||
case TextureFormat::RGBA8:
|
||||
@@ -5181,6 +5203,7 @@ namespace bimg
|
||||
|
||||
uint32_t ddspf = UINT32_MAX;
|
||||
uint32_t dxgiFormat = UINT32_MAX;
|
||||
uint32_t fourccFormat = UINT32_MAX;
|
||||
|
||||
for (uint32_t ii = 0; ii < BX_COUNTOF(s_translateDdsPixelFormat); ++ii)
|
||||
{
|
||||
@@ -5201,12 +5224,24 @@ namespace bimg
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (UINT32_MAX == dxgiFormat)
|
||||
{
|
||||
BX_ERROR_SET(_err, BIMG_ERROR, "DDS: DXGI format not supported.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (UINT32_MAX == ddspf && UINT32_MAX == dxgiFormat)
|
||||
{
|
||||
for (uint32_t ii = 0; ii < BX_COUNTOF(s_translateDdsFourccFormat); ++ii)
|
||||
{
|
||||
if (s_translateDdsFourccFormat[ii].m_textureFormat == _format)
|
||||
{
|
||||
fourccFormat = s_translateDdsFourccFormat[ii].m_format;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (UINT32_MAX == ddspf && UINT32_MAX == dxgiFormat && UINT32_MAX == fourccFormat)
|
||||
{
|
||||
BX_ERROR_SET(_err, BIMG_ERROR, "DDS: output format not supported.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
const uint32_t bpp = getBitsPerPixel(_format);
|
||||
@@ -5254,7 +5289,12 @@ namespace bimg
|
||||
{
|
||||
total += bx::write(_writer, uint32_t(8*sizeof(uint32_t) ), _err); // pixelFormatSize
|
||||
total += bx::write(_writer, uint32_t(DDPF_FOURCC), _err);
|
||||
|
||||
if (UINT32_MAX != fourccFormat)
|
||||
total += bx::write(_writer, fourccFormat, _err);
|
||||
else
|
||||
total += bx::write(_writer, uint32_t(DDS_DX10), _err);
|
||||
|
||||
total += bx::write(_writer, uint32_t(0), _err); // bitCount
|
||||
total += bx::writeRep(_writer, 0, 4*sizeof(uint32_t), _err); // bitmask
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#include <nvtt/nvtt.h>
|
||||
#include <pvrtc/PvrTcEncoder.h>
|
||||
#include <edtaa3/edtaa3func.h>
|
||||
#include <astc/astc_lib.h>
|
||||
|
||||
BX_PRAGMA_DIAGNOSTIC_PUSH();
|
||||
BX_PRAGMA_DIAGNOSTIC_IGNORED_MSVC(4100) // warning C4100: 'alloc_context': unreferenced formal parameter
|
||||
@@ -35,6 +36,14 @@ namespace bimg
|
||||
};
|
||||
BX_STATIC_ASSERT(Quality::Count == BX_COUNTOF(s_squishQuality) );
|
||||
|
||||
static const ASTC_COMPRESS_MODE s_astcQuality[] =
|
||||
{
|
||||
ASTC_COMPRESS_MEDIUM, // Default
|
||||
ASTC_COMPRESS_THOROUGH, // Highest
|
||||
ASTC_COMPRESS_FAST, // Fastest
|
||||
};
|
||||
BX_STATIC_ASSERT(Quality::Count == BX_COUNTOF(s_astcQuality));
|
||||
|
||||
void imageEncodeFromRgba8(bx::AllocatorI* _allocator, void* _dst, const void* _src, uint32_t _width, uint32_t _height, uint32_t _depth, TextureFormat::Enum _format, Quality::Enum _quality, bx::Error* _err)
|
||||
{
|
||||
const uint8_t* src = (const uint8_t*)_src;
|
||||
@@ -122,6 +131,22 @@ namespace bimg
|
||||
}
|
||||
break;
|
||||
|
||||
case TextureFormat::ASTC4x4:
|
||||
case TextureFormat::ASTC5x5:
|
||||
case TextureFormat::ASTC6x6:
|
||||
case TextureFormat::ASTC8x5:
|
||||
case TextureFormat::ASTC8x6:
|
||||
case TextureFormat::ASTC10x5:
|
||||
{
|
||||
const bimg::ImageBlockInfo& astcBlockInfo = bimg::getBlockInfo(_format);
|
||||
|
||||
ASTC_COMPRESS_MODE compress_mode = s_astcQuality[_quality];
|
||||
ASTC_DECODE_MODE decode_mode = ASTC_DECODE_LDR_LINEAR;
|
||||
|
||||
astc_compress(_width, _height, src, ASTC_RGBA, srcPitch, astcBlockInfo.blockWidth, astcBlockInfo.blockHeight, compress_mode, decode_mode, dst);
|
||||
}
|
||||
break;
|
||||
|
||||
case TextureFormat::BGRA8:
|
||||
imageSwizzleBgra8(dst, dstPitch, _width, _height, src, srcPitch);
|
||||
break;
|
||||
@@ -200,15 +225,21 @@ namespace bimg
|
||||
{
|
||||
switch (_dstFormat)
|
||||
{
|
||||
case bimg::TextureFormat::BC1:
|
||||
case bimg::TextureFormat::BC2:
|
||||
case bimg::TextureFormat::BC3:
|
||||
case bimg::TextureFormat::BC4:
|
||||
case bimg::TextureFormat::BC5:
|
||||
case bimg::TextureFormat::ETC1:
|
||||
case bimg::TextureFormat::ETC2:
|
||||
case bimg::TextureFormat::PTC14:
|
||||
case bimg::TextureFormat::PTC14A:
|
||||
case TextureFormat::BC1:
|
||||
case TextureFormat::BC2:
|
||||
case TextureFormat::BC3:
|
||||
case TextureFormat::BC4:
|
||||
case TextureFormat::BC5:
|
||||
case TextureFormat::ETC1:
|
||||
case TextureFormat::ETC2:
|
||||
case TextureFormat::PTC14:
|
||||
case TextureFormat::PTC14A:
|
||||
case TextureFormat::ASTC4x4:
|
||||
case TextureFormat::ASTC5x5:
|
||||
case TextureFormat::ASTC6x6:
|
||||
case TextureFormat::ASTC8x5:
|
||||
case TextureFormat::ASTC8x6:
|
||||
case TextureFormat::ASTC10x5:
|
||||
{
|
||||
uint8_t* temp = (uint8_t*)BX_ALLOC(_allocator, _width*_height*_depth*4);
|
||||
imageDecodeToRgba8(_allocator, temp, _src, _width, _height, _width*4, _srcFormat);
|
||||
|
||||
@@ -842,10 +842,11 @@ void help(const char* _error = NULL, bool _showHelp = true)
|
||||
" aspect ratio will be preserved.\n"
|
||||
" --radiance <model> Radiance cubemap filter. (Lighting model: Phong, PhongBrdf, Blinn, BlinnBrdf, GGX)\n"
|
||||
" --as <extension> Save as.\n"
|
||||
" --formats List all supported formats.\n"
|
||||
" --validate *DEBUG* Validate that output image produced matches after loading.\n"
|
||||
|
||||
"\n"
|
||||
"For additional information, see https://github.com/bkaradzic/bgfx\n"
|
||||
"For additional information, see https://github.com/bkaradzic/bimg\n"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -909,6 +910,24 @@ int main(int _argc, const char* _argv[])
|
||||
return bx::kExitFailure;
|
||||
}
|
||||
|
||||
if (cmdLine.hasArg("formats"))
|
||||
{
|
||||
printf("Uncompressed formats:\n");
|
||||
|
||||
for (int format = bimg::TextureFormat::Unknown + 1; format < bimg::TextureFormat::UnknownDepth; format++)
|
||||
printf(" %s\n", bimg::getName((bimg::TextureFormat::Enum) format));
|
||||
|
||||
for (int format = bimg::TextureFormat::UnknownDepth + 1; format < bimg::TextureFormat::Count; format++)
|
||||
printf(" %s\n", bimg::getName((bimg::TextureFormat::Enum) format));
|
||||
|
||||
printf("Compressed formats:\n");
|
||||
|
||||
for (int format = 0; format < bimg::TextureFormat::Unknown; format++)
|
||||
printf(" %s\n", bimg::getName((bimg::TextureFormat::Enum) format));
|
||||
|
||||
return bx::kExitSuccess;
|
||||
}
|
||||
|
||||
const char* inputFileName = cmdLine.findOption('f');
|
||||
if (NULL == inputFileName)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user