Moved image code from bgfx into bimg library.

This commit is contained in:
Branimir Karadžić
2017-04-02 19:26:02 -07:00
parent 369198a1d4
commit f475cf0623
151 changed files with 70010 additions and 22 deletions

34
3rdparty/edtaa3/LICENSE.md vendored Normal file
View File

@@ -0,0 +1,34 @@
https://github.com/OpenGLInsights/OpenGLInsightsCode/blob/master/Chapter%2012%202D%20Shape%20Rendering%20by%20Distance%20Fields/LICENSE.txt
The C code and the GLSL code for the OpenGL demo is public
domain code. The distance transform code in the console
application to create distance field textures, located in
the file "edtaa3func.c", is MIT licensed, and free to use
under the following conditions.
https://github.com/OpenGLInsights/OpenGLInsightsCode/issues/6#issuecomment-67829157
----
Copyright (C) 2011 by Stefan Gustavson
(stefan.gustavson@liu.se)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
----

580
3rdparty/edtaa3/edtaa3func.cpp vendored Normal file
View File

@@ -0,0 +1,580 @@
/*
* edtaa3()
*
* Sweep-and-update Euclidean distance transform of an
* image. Positive pixels are treated as object pixels,
* zero or negative pixels are treated as background.
* An attempt is made to treat antialiased edges correctly.
* The input image must have pixels in the range [0,1],
* and the antialiased image should be a box-filter
* sampling of the ideal, crisp edge.
* If the antialias region is more than 1 pixel wide,
* the result from this transform will be inaccurate.
*
* By Stefan Gustavson (stefan.gustavson@gmail.com).
*
* Originally written in 1994, based on a verbal
* description of Per-Erik Danielsson's SSED8 algorithm
* as presented in the PhD dissertation of Ingemar
* Ragnemalm. This is Per-Erik Danielsson's scanline
* scheme from 1979 - I only implemented it in C.
*
* Updated in 2004 to treat border pixels correctly,
* and cleaned up the code to improve readability.
*
* Updated in 2009 to handle anti-aliased edges,
* as published in the article "Anti-aliased Euclidean
* distance transform" by Stefan Gustavson and Robin Strand,
* Pattern Recognition Letters 32 (2011) 252257.
*
* Updated in 2011 to avoid a corner case causing an
* infinite loop for some input data.
*
*/
/*
Copyright (C) 2011 by Stefan Gustavson
(stefan.gustavson@liu.se)
This code is distributed under the permissive "MIT license":
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <math.h>
/*
* Compute the local gradient at edge pixels using convolution filters.
* The gradient is computed only at edge pixels. At other places in the
* image, it is never used, and it's mostly zero anyway.
*/
void computegradient(double *img, int w, int h, double *gx, double *gy)
{
int i,j,k;
double glength;
#define SQRT2 1.4142136
for(i = 1; i < h-1; i++) { // Avoid edges where the kernels would spill over
for(j = 1; j < w-1; j++) {
k = i*w + j;
if((img[k]>0.0) && (img[k]<1.0)) { // Compute gradient for edge pixels only
gx[k] = -img[k-w-1] - SQRT2*img[k-1] - img[k+w-1] + img[k-w+1] + SQRT2*img[k+1] + img[k+w+1];
gy[k] = -img[k-w-1] - SQRT2*img[k-w] - img[k-w+1] + img[k+w-1] + SQRT2*img[k+w] + img[k+w+1];
glength = gx[k]*gx[k] + gy[k]*gy[k];
if(glength > 0.0) { // Avoid division by zero
glength = sqrt(glength);
gx[k]=gx[k]/glength;
gy[k]=gy[k]/glength;
}
}
}
}
// TODO: Compute reasonable values for gx, gy also around the image edges.
// (These are zero now, which reduces the accuracy for a 1-pixel wide region
// around the image edge.) 2x2 kernels would be suitable for this.
}
/*
* A somewhat tricky function to approximate the distance to an edge in a
* certain pixel, with consideration to either the local gradient (gx,gy)
* or the direction to the pixel (dx,dy) and the pixel greyscale value a.
* The latter alternative, using (dx,dy), is the metric used by edtaa2().
* Using a local estimate of the edge gradient (gx,gy) yields much better
* accuracy at and near edges, and reduces the error even at distant pixels
* provided that the gradient direction is accurately estimated.
*/
double edgedf(double gx, double gy, double a)
{
double df, glength, temp, a1;
if ((gx == 0) || (gy == 0)) { // Either A) gu or gv are zero, or B) both
df = 0.5-a; // Linear approximation is A) correct or B) a fair guess
} else {
glength = sqrt(gx*gx + gy*gy);
if(glength>0) {
gx = gx/glength;
gy = gy/glength;
}
/* Everything is symmetric wrt sign and transposition,
* so move to first octant (gx>=0, gy>=0, gx>=gy) to
* avoid handling all possible edge directions.
*/
gx = fabs(gx);
gy = fabs(gy);
if(gx<gy) {
temp = gx;
gx = gy;
gy = temp;
}
a1 = 0.5*gy/gx;
if (a < a1) { // 0 <= a < a1
df = 0.5*(gx + gy) - sqrt(2.0*gx*gy*a);
} else if (a < (1.0-a1)) { // a1 <= a <= 1-a1
df = (0.5-a)*gx;
} else { // 1-a1 < a <= 1
df = -0.5*(gx + gy) + sqrt(2.0*gx*gy*(1.0-a));
}
}
return df;
}
double distaa3(double *img, double *gximg, double *gyimg, int w, int c, int xc, int yc, int xi, int yi)
{
double di, df, dx, dy, gx, gy, a;
int closest;
closest = c-xc-yc*w; // Index to the edge pixel pointed to from c
a = img[closest]; // Grayscale value at the edge pixel
gx = gximg[closest]; // X gradient component at the edge pixel
gy = gyimg[closest]; // Y gradient component at the edge pixel
if(a > 1.0) a = 1.0;
if(a < 0.0) a = 0.0; // Clip grayscale values outside the range [0,1]
if(a == 0.0) return 1000000.0; // Not an object pixel, return "very far" ("don't know yet")
dx = (double)xi;
dy = (double)yi;
di = sqrt(dx*dx + dy*dy); // Length of integer vector, like a traditional EDT
if(di==0) { // Use local gradient only at edges
// Estimate based on local gradient only
df = edgedf(gx, gy, a);
} else {
// Estimate gradient based on direction to edge (accurate for large di)
df = edgedf(dx, dy, a);
}
return di + df; // Same metric as edtaa2, except at edges (where di=0)
}
// Shorthand macro: add ubiquitous parameters img, gx, gy and w and call distaa3()
#define DISTAA(c,xc,yc,xi,yi) (distaa3(img, gx, gy, w, c, xc, yc, xi, yi))
void edtaa3(double *img, double *gx, double *gy, int w, int h, short *distx, short *disty, double *dist)
{
int x, y, i, c;
int offset_u, offset_ur, offset_r, offset_rd,
offset_d, offset_dl, offset_l, offset_lu;
double olddist, newdist;
int cdistx, cdisty, newdistx, newdisty;
int changed;
double epsilon = 1e-3; // Safeguard against errors due to limited precision
/* Initialize index offsets for the current image width */
offset_u = -w;
offset_ur = -w+1;
offset_r = 1;
offset_rd = w+1;
offset_d = w;
offset_dl = w-1;
offset_l = -1;
offset_lu = -w-1;
/* Initialize the distance images */
for(i=0; i<w*h; i++) {
distx[i] = 0; // At first, all pixels point to
disty[i] = 0; // themselves as the closest known.
if(img[i] <= 0.0)
{
dist[i]= 1000000.0; // Big value, means "not set yet"
}
else if (img[i]<1.0) {
dist[i] = edgedf(gx[i], gy[i], img[i]); // Gradient-assisted estimate
}
else {
dist[i]= 0.0; // Inside the object
}
}
/* Perform the transformation */
do
{
changed = 0;
/* Scan rows, except first row */
for(y=1; y<h; y++)
{
/* move index to leftmost pixel of current row */
i = y*w;
/* scan right, propagate distances from above & left */
/* Leftmost pixel is special, has no left neighbors */
olddist = dist[i];
if(olddist > 0) // If non-zero distance or not set yet
{
c = i + offset_u; // Index of candidate for testing
cdistx = distx[c];
cdisty = disty[c];
newdistx = cdistx;
newdisty = cdisty+1;
newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
if(newdist < olddist-epsilon)
{
distx[i]=newdistx;
disty[i]=newdisty;
dist[i]=newdist;
olddist=newdist;
changed = 1;
}
c = i+offset_ur;
cdistx = distx[c];
cdisty = disty[c];
newdistx = cdistx-1;
newdisty = cdisty+1;
newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
if(newdist < olddist-epsilon)
{
distx[i]=newdistx;
disty[i]=newdisty;
dist[i]=newdist;
changed = 1;
}
}
i++;
/* Middle pixels have all neighbors */
for(x=1; x<w-1; x++, i++)
{
olddist = dist[i];
if(olddist <= 0) continue; // No need to update further
c = i+offset_l;
cdistx = distx[c];
cdisty = disty[c];
newdistx = cdistx+1;
newdisty = cdisty;
newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
if(newdist < olddist-epsilon)
{
distx[i]=newdistx;
disty[i]=newdisty;
dist[i]=newdist;
olddist=newdist;
changed = 1;
}
c = i+offset_lu;
cdistx = distx[c];
cdisty = disty[c];
newdistx = cdistx+1;
newdisty = cdisty+1;
newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
if(newdist < olddist-epsilon)
{
distx[i]=newdistx;
disty[i]=newdisty;
dist[i]=newdist;
olddist=newdist;
changed = 1;
}
c = i+offset_u;
cdistx = distx[c];
cdisty = disty[c];
newdistx = cdistx;
newdisty = cdisty+1;
newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
if(newdist < olddist-epsilon)
{
distx[i]=newdistx;
disty[i]=newdisty;
dist[i]=newdist;
olddist=newdist;
changed = 1;
}
c = i+offset_ur;
cdistx = distx[c];
cdisty = disty[c];
newdistx = cdistx-1;
newdisty = cdisty+1;
newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
if(newdist < olddist-epsilon)
{
distx[i]=newdistx;
disty[i]=newdisty;
dist[i]=newdist;
changed = 1;
}
}
/* Rightmost pixel of row is special, has no right neighbors */
olddist = dist[i];
if(olddist > 0) // If not already zero distance
{
c = i+offset_l;
cdistx = distx[c];
cdisty = disty[c];
newdistx = cdistx+1;
newdisty = cdisty;
newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
if(newdist < olddist-epsilon)
{
distx[i]=newdistx;
disty[i]=newdisty;
dist[i]=newdist;
olddist=newdist;
changed = 1;
}
c = i+offset_lu;
cdistx = distx[c];
cdisty = disty[c];
newdistx = cdistx+1;
newdisty = cdisty+1;
newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
if(newdist < olddist-epsilon)
{
distx[i]=newdistx;
disty[i]=newdisty;
dist[i]=newdist;
olddist=newdist;
changed = 1;
}
c = i+offset_u;
cdistx = distx[c];
cdisty = disty[c];
newdistx = cdistx;
newdisty = cdisty+1;
newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
if(newdist < olddist-epsilon)
{
distx[i]=newdistx;
disty[i]=newdisty;
dist[i]=newdist;
changed = 1;
}
}
/* Move index to second rightmost pixel of current row. */
/* Rightmost pixel is skipped, it has no right neighbor. */
i = y*w + w-2;
/* scan left, propagate distance from right */
for(x=w-2; x>=0; x--, i--)
{
olddist = dist[i];
if(olddist <= 0) continue; // Already zero distance
c = i+offset_r;
cdistx = distx[c];
cdisty = disty[c];
newdistx = cdistx-1;
newdisty = cdisty;
newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
if(newdist < olddist-epsilon)
{
distx[i]=newdistx;
disty[i]=newdisty;
dist[i]=newdist;
changed = 1;
}
}
}
/* Scan rows in reverse order, except last row */
for(y=h-2; y>=0; y--)
{
/* move index to rightmost pixel of current row */
i = y*w + w-1;
/* Scan left, propagate distances from below & right */
/* Rightmost pixel is special, has no right neighbors */
olddist = dist[i];
if(olddist > 0) // If not already zero distance
{
c = i+offset_d;
cdistx = distx[c];
cdisty = disty[c];
newdistx = cdistx;
newdisty = cdisty-1;
newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
if(newdist < olddist-epsilon)
{
distx[i]=newdistx;
disty[i]=newdisty;
dist[i]=newdist;
olddist=newdist;
changed = 1;
}
c = i+offset_dl;
cdistx = distx[c];
cdisty = disty[c];
newdistx = cdistx+1;
newdisty = cdisty-1;
newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
if(newdist < olddist-epsilon)
{
distx[i]=newdistx;
disty[i]=newdisty;
dist[i]=newdist;
changed = 1;
}
}
i--;
/* Middle pixels have all neighbors */
for(x=w-2; x>0; x--, i--)
{
olddist = dist[i];
if(olddist <= 0) continue; // Already zero distance
c = i+offset_r;
cdistx = distx[c];
cdisty = disty[c];
newdistx = cdistx-1;
newdisty = cdisty;
newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
if(newdist < olddist-epsilon)
{
distx[i]=newdistx;
disty[i]=newdisty;
dist[i]=newdist;
olddist=newdist;
changed = 1;
}
c = i+offset_rd;
cdistx = distx[c];
cdisty = disty[c];
newdistx = cdistx-1;
newdisty = cdisty-1;
newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
if(newdist < olddist-epsilon)
{
distx[i]=newdistx;
disty[i]=newdisty;
dist[i]=newdist;
olddist=newdist;
changed = 1;
}
c = i+offset_d;
cdistx = distx[c];
cdisty = disty[c];
newdistx = cdistx;
newdisty = cdisty-1;
newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
if(newdist < olddist-epsilon)
{
distx[i]=newdistx;
disty[i]=newdisty;
dist[i]=newdist;
olddist=newdist;
changed = 1;
}
c = i+offset_dl;
cdistx = distx[c];
cdisty = disty[c];
newdistx = cdistx+1;
newdisty = cdisty-1;
newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
if(newdist < olddist-epsilon)
{
distx[i]=newdistx;
disty[i]=newdisty;
dist[i]=newdist;
changed = 1;
}
}
/* Leftmost pixel is special, has no left neighbors */
olddist = dist[i];
if(olddist > 0) // If not already zero distance
{
c = i+offset_r;
cdistx = distx[c];
cdisty = disty[c];
newdistx = cdistx-1;
newdisty = cdisty;
newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
if(newdist < olddist-epsilon)
{
distx[i]=newdistx;
disty[i]=newdisty;
dist[i]=newdist;
olddist=newdist;
changed = 1;
}
c = i+offset_rd;
cdistx = distx[c];
cdisty = disty[c];
newdistx = cdistx-1;
newdisty = cdisty-1;
newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
if(newdist < olddist-epsilon)
{
distx[i]=newdistx;
disty[i]=newdisty;
dist[i]=newdist;
olddist=newdist;
changed = 1;
}
c = i+offset_d;
cdistx = distx[c];
cdisty = disty[c];
newdistx = cdistx;
newdisty = cdisty-1;
newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
if(newdist < olddist-epsilon)
{
distx[i]=newdistx;
disty[i]=newdisty;
dist[i]=newdist;
changed = 1;
}
}
/* Move index to second leftmost pixel of current row. */
/* Leftmost pixel is skipped, it has no left neighbor. */
i = y*w + 1;
for(x=1; x<w; x++, i++)
{
/* scan right, propagate distance from left */
olddist = dist[i];
if(olddist <= 0) continue; // Already zero distance
c = i+offset_l;
cdistx = distx[c];
cdisty = disty[c];
newdistx = cdistx+1;
newdisty = cdisty;
newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
if(newdist < olddist-epsilon)
{
distx[i]=newdistx;
disty[i]=newdisty;
dist[i]=newdist;
changed = 1;
}
}
}
}
while(changed); // Sweep until no more updates are made
/* The transformation is completed. */
}

7
3rdparty/edtaa3/edtaa3func.h vendored Normal file
View File

@@ -0,0 +1,7 @@
#ifndef EDTAA3_H_HEADER_GUARD
#define EDTAA3_H_HEADER_GUARD
extern void computegradient(double *img, int w, int h, double *gx, double *gy);
extern void edtaa3(double *img, double *gx, double *gy, int w, int h, short *distx, short *disty, double *dist);
#endif // EDTAA3_H_HEADER_GUARD

161
3rdparty/etc1/LICENSE vendored Normal file
View File

@@ -0,0 +1,161 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction, and
distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by the
copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all other
entities that control, are controlled by, or are under common control with
that entity. For the purposes of this definition, "control" means (i) the
power, direct or indirect, to cause the direction or management of such
entity, whether by contract or otherwise, or (ii) ownership of fifty
percent (50%) or more of the outstanding shares, or (iii) beneficial
ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity exercising
permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical transformation
or translation of a Source form, including but not limited to compiled
object code, generated documentation, and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or Object
form, made available under the License, as indicated by a copyright
notice that is included in or attached to the work (an example is
provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including the original
version of the Work and any modifications or additions to that Work or
Derivative Works thereof, that is intentionally submitted to Licensor
for inclusion in the Work by the copyright owner or by an individual or
Legal Entity authorized to submit on behalf of the copyright owner. For
the purposes of this definition, "submitted" means any form of electronic,
verbal, or written communication sent to the Licensor or its
representatives, including but not limited to communication on electronic
mailing lists, source code control systems, and issue tracking systems that
are managed by, or on behalf of, the Licensor for the purpose of discussing
and improving the Work, but excluding communication that is conspicuously
marked or otherwise designated in writing by the copyright owner as "Not
a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity on
behalf of whom a Contribution has been received by Licensor and subsequently
incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of this
License, each Contributor hereby grants to You a perpetual, worldwide,
non-exclusive, no-charge, royalty-free, irrevocable copyright license to
reproduce, prepare Derivative Works of, publicly display, publicly perform,
sublicense, and distribute the Work and such Derivative Works in Source or
Object form.
3. Grant of Patent License. Subject to the terms and conditions of this
License, each Contributor hereby grants to You a perpetual, worldwide,
non-exclusive, no-charge, royalty-free, irrevocable (except as stated in
this section) patent license to make, have made, use, offer to sell, sell,
import, and otherwise transfer the Work, where such license applies only to
those patent claims licensable by such Contributor that are necessarily
infringed by their Contribution(s) alone or by combination of their
Contribution(s) with the Work to which such Contribution(s) was submitted.
If You institute patent litigation against any entity (including a cross-claim
or counterclaim in a lawsuit) alleging that the Work or a Contribution
incorporated within the Work constitutes direct or contributory patent
infringement, then any patent licenses granted to You under this License
for that Work shall terminate as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the Work or
Derivative Works thereof in any medium, with or without modifications, and
in Source or Object form, provided that You meet the following conditions:
You must give any other recipients of the Work or Derivative Works a copy of
this License; and
You must cause any modified files to carry prominent notices stating that
You changed the files; and
You must retain, in the Source form of any Derivative Works that You
distribute, all copyright, patent, trademark, and attribution notices
from the Source form of the Work, excluding those notices that do not
pertain to any part of the Derivative Works; and
If the Work includes a "NOTICE" text file as part of its distribution,
then any Derivative Works that You distribute must include a readable
copy of the attribution notices contained within such NOTICE file, excluding
those notices that do not pertain to any part of the Derivative Works, in
at least one of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or documentation, if
provided along with the Derivative Works; or, within a display generated by
the Derivative Works, if and wherever such third-party notices normally
appear. The contents of the NOTICE file are for informational purposes
only and do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside or as
an addendum to the NOTICE text from the Work, provided that such additional
attribution notices cannot be construed as modifying the License.
You may add Your own copyright statement to Your modifications and may provide
additional or different license terms and conditions for use, reproduction, or
distribution of Your modifications, or for any such Derivative Works as a
whole, provided Your use, reproduction, and distribution of the Work otherwise
complies with the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise, any
Contribution intentionally submitted for inclusion in the Work by You to the
Licensor shall be under the terms and conditions of this License, without any
additional terms or conditions. Notwithstanding the above, nothing herein
shall supersede or modify the terms of any separate license agreement you
may have executed with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade names,
trademarks, service marks, or product names of the Licensor, except as
required for reasonable and customary use in describing the origin of the
Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or agreed to
in writing, Licensor provides the Work (and each Contributor provides its
Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
ANY KIND, either express or implied, including, without limitation, any
warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or
FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining
the appropriateness of using or redistributing the Work and assume any risks
associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory, whether in
tort (including negligence), contract, or otherwise, unless required by
applicable law (such as deliberate and grossly negligent acts) or agreed to
in writing, shall any Contributor be liable to You for damages, including
any direct, indirect, special, incidental, or consequential damages of any
character arising as a result of this License or out of the use or inability
to use the Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all other
commercial damages or losses), even if such Contributor has been advised
of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing the
Work or Derivative Works thereof, You may choose to offer, and charge a
fee for, acceptance of support, warranty, indemnity, or other liability
obligations and/or rights consistent with this License. However, in accepting
such obligations, You may act only on Your own behalf and on Your sole
responsibility, not on behalf of any other Contributor, and only if You
agree to indemnify, defend, and hold each Contributor harmless for any
liability incurred by, or claims asserted against, such Contributor by
reason of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS

686
3rdparty/etc1/etc1.cpp vendored Normal file
View File

@@ -0,0 +1,686 @@
// Copyright 2009 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//////////////////////////////////////////////////////////////////////////////////////////
// This is a fork of the AOSP project ETC1 codec. The original code can be found
// at the following web site:
// https://android.googlesource.com/platform/frameworks/native/+/master/opengl/include/ETC1/
//////////////////////////////////////////////////////////////////////////////////////////
#include "etc1.h"
#include <cstring>
/* From http://www.khronos.org/registry/gles/extensions/OES/OES_compressed_ETC1_RGB8_texture.txt
The number of bits that represent a 4x4 texel block is 64 bits if
<internalformat> is given by ETC1_RGB8_OES.
The data for a block is a number of bytes,
{q0, q1, q2, q3, q4, q5, q6, q7}
where byte q0 is located at the lowest memory address and q7 at
the highest. The 64 bits specifying the block is then represented
by the following 64 bit integer:
int64bit = 256*(256*(256*(256*(256*(256*(256*q0+q1)+q2)+q3)+q4)+q5)+q6)+q7;
ETC1_RGB8_OES:
a) bit layout in bits 63 through 32 if diffbit = 0
63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48
-----------------------------------------------
| base col1 | base col2 | base col1 | base col2 |
| R1 (4bits)| R2 (4bits)| G1 (4bits)| G2 (4bits)|
-----------------------------------------------
47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32
---------------------------------------------------
| base col1 | base col2 | table | table |diff|flip|
| B1 (4bits)| B2 (4bits)| cw 1 | cw 2 |bit |bit |
---------------------------------------------------
b) bit layout in bits 63 through 32 if diffbit = 1
63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48
-----------------------------------------------
| base col1 | dcol 2 | base col1 | dcol 2 |
| R1' (5 bits) | dR2 | G1' (5 bits) | dG2 |
-----------------------------------------------
47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32
---------------------------------------------------
| base col 1 | dcol 2 | table | table |diff|flip|
| B1' (5 bits) | dB2 | cw 1 | cw 2 |bit |bit |
---------------------------------------------------
c) bit layout in bits 31 through 0 (in both cases)
31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16
-----------------------------------------------
| most significant pixel index bits |
| p| o| n| m| l| k| j| i| h| g| f| e| d| c| b| a|
-----------------------------------------------
15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
--------------------------------------------------
| least significant pixel index bits |
| p| o| n| m| l| k| j| i| h| g| f| e| d| c | b | a |
--------------------------------------------------
Add table 3.17.2: Intensity modifier sets for ETC1 compressed textures:
table codeword modifier table
------------------ ----------------------
0 -8 -2 2 8
1 -17 -5 5 17
2 -29 -9 9 29
3 -42 -13 13 42
4 -60 -18 18 60
5 -80 -24 24 80
6 -106 -33 33 106
7 -183 -47 47 183
Add table 3.17.3 Mapping from pixel index values to modifier values for
ETC1 compressed textures:
pixel index value
---------------
msb lsb resulting modifier value
----- ----- -------------------------
1 1 -b (large negative value)
1 0 -a (small negative value)
0 0 a (small positive value)
0 1 b (large positive value)
*/
static const int kModifierTable[] = {
/* 0 */2, 8, -2, -8,
/* 1 */5, 17, -5, -17,
/* 2 */9, 29, -9, -29,
/* 3 */13, 42, -13, -42,
/* 4 */18, 60, -18, -60,
/* 5 */24, 80, -24, -80,
/* 6 */33, 106, -33, -106,
/* 7 */47, 183, -47, -183 };
static const int kLookup[8] = { 0, 1, 2, 3, -4, -3, -2, -1 };
static inline etc1_byte clamp(int x) {
return (etc1_byte) (x >= 0 ? (x < 255 ? x : 255) : 0);
}
static
inline int convert4To8(int b) {
int c = b & 0xf;
return (c << 4) | c;
}
static
inline int convert5To8(int b) {
int c = b & 0x1f;
return (c << 3) | (c >> 2);
}
static
inline int convert6To8(int b) {
int c = b & 0x3f;
return (c << 2) | (c >> 4);
}
static
inline int divideBy255(int d) {
return (d + 128 + (d >> 8)) >> 8;
}
static
inline int convert8To4(int b) {
int c = b & 0xff;
return divideBy255(c * 15);
}
static
inline int convert8To5(int b) {
int c = b & 0xff;
return divideBy255(c * 31);
}
static
inline int convertDiff(int base, int diff) {
return convert5To8((0x1f & base) + kLookup[0x7 & diff]);
}
static
void decode_subblock(etc1_byte* pOut, int r, int g, int b, const int* table,
etc1_uint32 low, bool second, bool flipped) {
int baseX = 0;
int baseY = 0;
if (second) {
if (flipped) {
baseY = 2;
} else {
baseX = 2;
}
}
for (int i = 0; i < 8; i++) {
int x, y;
if (flipped) {
x = baseX + (i >> 1);
y = baseY + (i & 1);
} else {
x = baseX + (i >> 2);
y = baseY + (i & 3);
}
int k = y + (x * 4);
int offset = ((low >> k) & 1) | ((low >> (k + 15)) & 2);
int delta = table[offset];
etc1_byte* q = pOut + 3 * (x + 4 * y);
*q++ = clamp(r + delta);
*q++ = clamp(g + delta);
*q++ = clamp(b + delta);
}
}
// Input is an ETC1 compressed version of the data.
// Output is a 4 x 4 square of 3-byte pixels in form R, G, B
void etc1_decode_block(const etc1_byte* pIn, etc1_byte* pOut) {
etc1_uint32 high = (pIn[0] << 24) | (pIn[1] << 16) | (pIn[2] << 8) | pIn[3];
etc1_uint32 low = (pIn[4] << 24) | (pIn[5] << 16) | (pIn[6] << 8) | pIn[7];
int r1, r2, g1, g2, b1, b2;
if (high & 2) {
// differential
int rBase = high >> 27;
int gBase = high >> 19;
int bBase = high >> 11;
r1 = convert5To8(rBase);
r2 = convertDiff(rBase, high >> 24);
g1 = convert5To8(gBase);
g2 = convertDiff(gBase, high >> 16);
b1 = convert5To8(bBase);
b2 = convertDiff(bBase, high >> 8);
} else {
// not differential
r1 = convert4To8(high >> 28);
r2 = convert4To8(high >> 24);
g1 = convert4To8(high >> 20);
g2 = convert4To8(high >> 16);
b1 = convert4To8(high >> 12);
b2 = convert4To8(high >> 8);
}
int tableIndexA = 7 & (high >> 5);
int tableIndexB = 7 & (high >> 2);
const int* tableA = kModifierTable + tableIndexA * 4;
const int* tableB = kModifierTable + tableIndexB * 4;
bool flipped = (high & 1) != 0;
decode_subblock(pOut, r1, g1, b1, tableA, low, false, flipped);
decode_subblock(pOut, r2, g2, b2, tableB, low, true, flipped);
}
typedef struct {
etc1_uint32 high;
etc1_uint32 low;
etc1_uint32 score; // Lower is more accurate
} etc_compressed;
static
inline void take_best(etc_compressed* a, const etc_compressed* b) {
if (a->score > b->score) {
*a = *b;
}
}
static
void etc_average_colors_subblock(const etc1_byte* pIn, etc1_uint32 inMask,
etc1_byte* pColors, bool flipped, bool second) {
int r = 0;
int g = 0;
int b = 0;
if (flipped) {
int by = 0;
if (second) {
by = 2;
}
for (int y = 0; y < 2; y++) {
int yy = by + y;
for (int x = 0; x < 4; x++) {
int i = x + 4 * yy;
if (inMask & (1 << i)) {
const etc1_byte* p = pIn + i * 3;
r += *(p++);
g += *(p++);
b += *(p++);
}
}
}
} else {
int bx = 0;
if (second) {
bx = 2;
}
for (int y = 0; y < 4; y++) {
for (int x = 0; x < 2; x++) {
int xx = bx + x;
int i = xx + 4 * y;
if (inMask & (1 << i)) {
const etc1_byte* p = pIn + i * 3;
r += *(p++);
g += *(p++);
b += *(p++);
}
}
}
}
pColors[0] = (etc1_byte)((r + 4) >> 3);
pColors[1] = (etc1_byte)((g + 4) >> 3);
pColors[2] = (etc1_byte)((b + 4) >> 3);
}
static
inline int square(int x) {
return x * x;
}
static etc1_uint32 chooseModifier(const etc1_byte* pBaseColors,
const etc1_byte* pIn, etc1_uint32 *pLow, int bitIndex,
const int* pModifierTable) {
etc1_uint32 bestScore = ~0;
int bestIndex = 0;
int pixelR = pIn[0];
int pixelG = pIn[1];
int pixelB = pIn[2];
int r = pBaseColors[0];
int g = pBaseColors[1];
int b = pBaseColors[2];
for (int i = 0; i < 4; i++) {
int modifier = pModifierTable[i];
int decodedG = clamp(g + modifier);
etc1_uint32 score = (etc1_uint32) (6 * square(decodedG - pixelG));
if (score >= bestScore) {
continue;
}
int decodedR = clamp(r + modifier);
score += (etc1_uint32) (3 * square(decodedR - pixelR));
if (score >= bestScore) {
continue;
}
int decodedB = clamp(b + modifier);
score += (etc1_uint32) square(decodedB - pixelB);
if (score < bestScore) {
bestScore = score;
bestIndex = i;
}
}
etc1_uint32 lowMask = (((bestIndex >> 1) << 16) | (bestIndex & 1))
<< bitIndex;
*pLow |= lowMask;
return bestScore;
}
static
void etc_encode_subblock_helper(const etc1_byte* pIn, etc1_uint32 inMask,
etc_compressed* pCompressed, bool flipped, bool second,
const etc1_byte* pBaseColors, const int* pModifierTable) {
int score = pCompressed->score;
if (flipped) {
int by = 0;
if (second) {
by = 2;
}
for (int y = 0; y < 2; y++) {
int yy = by + y;
for (int x = 0; x < 4; x++) {
int i = x + 4 * yy;
if (inMask & (1 << i)) {
score += chooseModifier(pBaseColors, pIn + i * 3,
&pCompressed->low, yy + x * 4, pModifierTable);
}
}
}
} else {
int bx = 0;
if (second) {
bx = 2;
}
for (int y = 0; y < 4; y++) {
for (int x = 0; x < 2; x++) {
int xx = bx + x;
int i = xx + 4 * y;
if (inMask & (1 << i)) {
score += chooseModifier(pBaseColors, pIn + i * 3,
&pCompressed->low, y + xx * 4, pModifierTable);
}
}
}
}
pCompressed->score = score;
}
static bool inRange4bitSigned(int color) {
return color >= -4 && color <= 3;
}
static void etc_encodeBaseColors(etc1_byte* pBaseColors,
const etc1_byte* pColors, etc_compressed* pCompressed) {
int r1, g1, b1, r2, g2, b2; // 8 bit base colors for sub-blocks
bool differential;
{
int r51 = convert8To5(pColors[0]);
int g51 = convert8To5(pColors[1]);
int b51 = convert8To5(pColors[2]);
int r52 = convert8To5(pColors[3]);
int g52 = convert8To5(pColors[4]);
int b52 = convert8To5(pColors[5]);
r1 = convert5To8(r51);
g1 = convert5To8(g51);
b1 = convert5To8(b51);
int dr = r52 - r51;
int dg = g52 - g51;
int db = b52 - b51;
differential = inRange4bitSigned(dr) && inRange4bitSigned(dg)
&& inRange4bitSigned(db);
if (differential) {
r2 = convert5To8(r51 + dr);
g2 = convert5To8(g51 + dg);
b2 = convert5To8(b51 + db);
pCompressed->high |= (r51 << 27) | ((7 & dr) << 24) | (g51 << 19)
| ((7 & dg) << 16) | (b51 << 11) | ((7 & db) << 8) | 2;
}
}
if (!differential) {
int r41 = convert8To4(pColors[0]);
int g41 = convert8To4(pColors[1]);
int b41 = convert8To4(pColors[2]);
int r42 = convert8To4(pColors[3]);
int g42 = convert8To4(pColors[4]);
int b42 = convert8To4(pColors[5]);
r1 = convert4To8(r41);
g1 = convert4To8(g41);
b1 = convert4To8(b41);
r2 = convert4To8(r42);
g2 = convert4To8(g42);
b2 = convert4To8(b42);
pCompressed->high |= (r41 << 28) | (r42 << 24) | (g41 << 20) | (g42
<< 16) | (b41 << 12) | (b42 << 8);
}
pBaseColors[0] = r1;
pBaseColors[1] = g1;
pBaseColors[2] = b1;
pBaseColors[3] = r2;
pBaseColors[4] = g2;
pBaseColors[5] = b2;
}
static
void etc_encode_block_helper(const etc1_byte* pIn, etc1_uint32 inMask,
const etc1_byte* pColors, etc_compressed* pCompressed, bool flipped) {
pCompressed->score = ~0;
pCompressed->high = (flipped ? 1 : 0);
pCompressed->low = 0;
etc1_byte pBaseColors[6];
etc_encodeBaseColors(pBaseColors, pColors, pCompressed);
int originalHigh = pCompressed->high;
const int* pModifierTable = kModifierTable;
for (int i = 0; i < 8; i++, pModifierTable += 4) {
etc_compressed temp;
temp.score = 0;
temp.high = originalHigh | (i << 5);
temp.low = 0;
etc_encode_subblock_helper(pIn, inMask, &temp, flipped, false,
pBaseColors, pModifierTable);
take_best(pCompressed, &temp);
}
pModifierTable = kModifierTable;
etc_compressed firstHalf = *pCompressed;
for (int i = 0; i < 8; i++, pModifierTable += 4) {
etc_compressed temp;
temp.score = firstHalf.score;
temp.high = firstHalf.high | (i << 2);
temp.low = firstHalf.low;
etc_encode_subblock_helper(pIn, inMask, &temp, flipped, true,
pBaseColors + 3, pModifierTable);
if (i == 0) {
*pCompressed = temp;
} else {
take_best(pCompressed, &temp);
}
}
}
static void writeBigEndian(etc1_byte* pOut, etc1_uint32 d) {
pOut[0] = (etc1_byte)(d >> 24);
pOut[1] = (etc1_byte)(d >> 16);
pOut[2] = (etc1_byte)(d >> 8);
pOut[3] = (etc1_byte) d;
}
// Input is a 4 x 4 square of 3-byte pixels in form R, G, B
// inmask is a 16-bit mask where bit (1 << (x + y * 4)) tells whether the corresponding (x,y)
// pixel is valid or not. Invalid pixel color values are ignored when compressing.
// Output is an ETC1 compressed version of the data.
void etc1_encode_block(const etc1_byte* pIn, etc1_uint32 inMask,
etc1_byte* pOut) {
etc1_byte colors[6];
etc1_byte flippedColors[6];
etc_average_colors_subblock(pIn, inMask, colors, false, false);
etc_average_colors_subblock(pIn, inMask, colors + 3, false, true);
etc_average_colors_subblock(pIn, inMask, flippedColors, true, false);
etc_average_colors_subblock(pIn, inMask, flippedColors + 3, true, true);
etc_compressed a, b;
etc_encode_block_helper(pIn, inMask, colors, &a, false);
etc_encode_block_helper(pIn, inMask, flippedColors, &b, true);
take_best(&a, &b);
writeBigEndian(pOut, a.high);
writeBigEndian(pOut + 4, a.low);
}
// Return the size of the encoded image data (does not include size of PKM header).
etc1_uint32 etc1_get_encoded_data_size(etc1_uint32 width, etc1_uint32 height) {
return (((width + 3) & ~3) * ((height + 3) & ~3)) >> 1;
}
// Encode an entire image.
// pIn - pointer to the image data. Formatted such that the Red component of
// pixel (x,y) is at pIn + pixelSize * x + stride * y + redOffset;
// pOut - pointer to encoded data. Must be large enough to store entire encoded image.
int etc1_encode_image(const etc1_byte* pIn, etc1_uint32 width, etc1_uint32 height,
etc1_uint32 pixelSize, etc1_uint32 stride, etc1_byte* pOut) {
if (pixelSize < 2 || pixelSize > 4) {
return -1;
}
static const unsigned short kYMask[] = { 0x0, 0xf, 0xff, 0xfff, 0xffff };
static const unsigned short kXMask[] = { 0x0, 0x1111, 0x3333, 0x7777,
0xffff };
etc1_byte block[ETC1_DECODED_BLOCK_SIZE];
etc1_byte encoded[ETC1_ENCODED_BLOCK_SIZE];
etc1_uint32 encodedWidth = (width + 3) & ~3;
etc1_uint32 encodedHeight = (height + 3) & ~3;
for (etc1_uint32 y = 0; y < encodedHeight; y += 4) {
etc1_uint32 yEnd = height - y;
if (yEnd > 4) {
yEnd = 4;
}
int ymask = kYMask[yEnd];
for (etc1_uint32 x = 0; x < encodedWidth; x += 4) {
etc1_uint32 xEnd = width - x;
if (xEnd > 4) {
xEnd = 4;
}
int mask = ymask & kXMask[xEnd];
for (etc1_uint32 cy = 0; cy < yEnd; cy++) {
etc1_byte* q = block + (cy * 4) * 3;
const etc1_byte* p = pIn + pixelSize * x + stride * (y + cy);
if (pixelSize >= 3) {
for (etc1_uint32 cx = 0; cx < xEnd; cx++) {
memcpy(q, p, 3);
q += 3;
p += pixelSize;
}
} else {
for (etc1_uint32 cx = 0; cx < xEnd; cx++) {
int pixel = (p[1] << 8) | p[0];
*q++ = convert5To8(pixel >> 11);
*q++ = convert6To8(pixel >> 5);
*q++ = convert5To8(pixel);
p += pixelSize;
}
}
}
etc1_encode_block(block, mask, encoded);
memcpy(pOut, encoded, sizeof(encoded));
pOut += sizeof(encoded);
}
}
return 0;
}
// Decode an entire image.
// pIn - pointer to encoded data.
// pOut - pointer to the image data. Will be written such that the Red component of
// pixel (x,y) is at pIn + pixelSize * x + stride * y + redOffset. Must be
// large enough to store entire image.
int etc1_decode_image(const etc1_byte* pIn, etc1_byte* pOut,
etc1_uint32 width, etc1_uint32 height,
etc1_uint32 pixelSize, etc1_uint32 stride) {
if (pixelSize < 2 || pixelSize > 4) {
return -1;
}
etc1_byte block[ETC1_DECODED_BLOCK_SIZE];
etc1_uint32 encodedWidth = (width + 3) & ~3;
etc1_uint32 encodedHeight = (height + 3) & ~3;
for (etc1_uint32 y = 0; y < encodedHeight; y += 4) {
etc1_uint32 yEnd = height - y;
if (yEnd > 4) {
yEnd = 4;
}
for (etc1_uint32 x = 0; x < encodedWidth; x += 4) {
etc1_uint32 xEnd = width - x;
if (xEnd > 4) {
xEnd = 4;
}
etc1_decode_block(pIn, block);
pIn += ETC1_ENCODED_BLOCK_SIZE;
for (etc1_uint32 cy = 0; cy < yEnd; cy++) {
const etc1_byte* q = block + (cy * 4) * 3;
etc1_byte* p = pOut + pixelSize * x + stride * (y + cy);
if (pixelSize >= 3) {
for (etc1_uint32 cx = 0; cx < xEnd; cx++) {
memcpy(p, q, 3);
q += 3;
p += pixelSize;
}
} else {
for (etc1_uint32 cx = 0; cx < xEnd; cx++) {
etc1_byte r = *q++;
etc1_byte g = *q++;
etc1_byte b = *q++;
etc1_uint32 pixel = ((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3);
*p++ = (etc1_byte) pixel;
*p++ = (etc1_byte) (pixel >> 8);
}
}
}
}
}
return 0;
}
static const char kMagic[] = { 'P', 'K', 'M', ' ', '1', '0' };
static const etc1_uint32 ETC1_PKM_FORMAT_OFFSET = 6;
static const etc1_uint32 ETC1_PKM_ENCODED_WIDTH_OFFSET = 8;
static const etc1_uint32 ETC1_PKM_ENCODED_HEIGHT_OFFSET = 10;
static const etc1_uint32 ETC1_PKM_WIDTH_OFFSET = 12;
static const etc1_uint32 ETC1_PKM_HEIGHT_OFFSET = 14;
static const etc1_uint32 ETC1_RGB_NO_MIPMAPS = 0;
static void writeBEUint16(etc1_byte* pOut, etc1_uint32 data) {
pOut[0] = (etc1_byte) (data >> 8);
pOut[1] = (etc1_byte) data;
}
static etc1_uint32 readBEUint16(const etc1_byte* pIn) {
return (pIn[0] << 8) | pIn[1];
}
// Format a PKM header
void etc1_pkm_format_header(etc1_byte* pHeader, etc1_uint32 width, etc1_uint32 height) {
memcpy(pHeader, kMagic, sizeof(kMagic));
etc1_uint32 encodedWidth = (width + 3) & ~3;
etc1_uint32 encodedHeight = (height + 3) & ~3;
writeBEUint16(pHeader + ETC1_PKM_FORMAT_OFFSET, ETC1_RGB_NO_MIPMAPS);
writeBEUint16(pHeader + ETC1_PKM_ENCODED_WIDTH_OFFSET, encodedWidth);
writeBEUint16(pHeader + ETC1_PKM_ENCODED_HEIGHT_OFFSET, encodedHeight);
writeBEUint16(pHeader + ETC1_PKM_WIDTH_OFFSET, width);
writeBEUint16(pHeader + ETC1_PKM_HEIGHT_OFFSET, height);
}
// Check if a PKM header is correctly formatted.
etc1_bool etc1_pkm_is_valid(const etc1_byte* pHeader) {
if (memcmp(pHeader, kMagic, sizeof(kMagic))) {
return false;
}
etc1_uint32 format = readBEUint16(pHeader + ETC1_PKM_FORMAT_OFFSET);
etc1_uint32 encodedWidth = readBEUint16(pHeader + ETC1_PKM_ENCODED_WIDTH_OFFSET);
etc1_uint32 encodedHeight = readBEUint16(pHeader + ETC1_PKM_ENCODED_HEIGHT_OFFSET);
etc1_uint32 width = readBEUint16(pHeader + ETC1_PKM_WIDTH_OFFSET);
etc1_uint32 height = readBEUint16(pHeader + ETC1_PKM_HEIGHT_OFFSET);
return format == ETC1_RGB_NO_MIPMAPS &&
encodedWidth >= width && encodedWidth - width < 4 &&
encodedHeight >= height && encodedHeight - height < 4;
}
// Read the image width from a PKM header
etc1_uint32 etc1_pkm_get_width(const etc1_byte* pHeader) {
return readBEUint16(pHeader + ETC1_PKM_WIDTH_OFFSET);
}
// Read the image height from a PKM header
etc1_uint32 etc1_pkm_get_height(const etc1_byte* pHeader){
return readBEUint16(pHeader + ETC1_PKM_HEIGHT_OFFSET);
}

114
3rdparty/etc1/etc1.h vendored Normal file
View File

@@ -0,0 +1,114 @@
// Copyright 2009 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//////////////////////////////////////////////////////////////////////////////////////////
// This is a fork of the AOSP project ETC1 codec. The original code can be found
// at the following web site:
// https://android.googlesource.com/platform/frameworks/native/+/master/opengl/libs/ETC1/
//////////////////////////////////////////////////////////////////////////////////////////
#ifndef __etc1_h__
#define __etc1_h__
#define ETC1_ENCODED_BLOCK_SIZE 8
#define ETC1_DECODED_BLOCK_SIZE 48
#ifndef ETC1_RGB8_OES
#define ETC1_RGB8_OES 0x8D64
#endif
typedef unsigned char etc1_byte;
typedef int etc1_bool;
typedef unsigned int etc1_uint32;
#ifdef __cplusplus
extern "C" {
#endif
// Encode a block of pixels.
//
// pIn is a pointer to a ETC_DECODED_BLOCK_SIZE array of bytes that represent a
// 4 x 4 square of 3-byte pixels in form R, G, B. Byte (3 * (x + 4 * y) is the R
// value of pixel (x, y).
//
// validPixelMask is a 16-bit mask where bit (1 << (x + y * 4)) indicates whether
// the corresponding (x,y) pixel is valid. Invalid pixel color values are ignored when compressing.
//
// pOut is an ETC1 compressed version of the data.
void etc1_encode_block(const etc1_byte* pIn, etc1_uint32 validPixelMask, etc1_byte* pOut);
// Decode a block of pixels.
//
// pIn is an ETC1 compressed version of the data.
//
// pOut is a pointer to a ETC_DECODED_BLOCK_SIZE array of bytes that represent a
// 4 x 4 square of 3-byte pixels in form R, G, B. Byte (3 * (x + 4 * y) is the R
// value of pixel (x, y).
void etc1_decode_block(const etc1_byte* pIn, etc1_byte* pOut);
// Return the size of the encoded image data (does not include size of PKM header).
etc1_uint32 etc1_get_encoded_data_size(etc1_uint32 width, etc1_uint32 height);
// Encode an entire image.
// pIn - pointer to the image data. Formatted such that
// pixel (x,y) is at pIn + pixelSize * x + stride * y;
// pOut - pointer to encoded data. Must be large enough to store entire encoded image.
// pixelSize can be 2 or 3. 2 is an GL_UNSIGNED_SHORT_5_6_5 image, 3 is a GL_BYTE RGB image.
// returns non-zero if there is an error.
int etc1_encode_image(const etc1_byte* pIn, etc1_uint32 width, etc1_uint32 height,
etc1_uint32 pixelSize, etc1_uint32 stride, etc1_byte* pOut);
// Decode an entire image.
// pIn - pointer to encoded data.
// pOut - pointer to the image data. Will be written such that
// pixel (x,y) is at pIn + pixelSize * x + stride * y. Must be
// large enough to store entire image.
// pixelSize can be 2 or 3. 2 is an GL_UNSIGNED_SHORT_5_6_5 image, 3 is a GL_BYTE RGB image.
// returns non-zero if there is an error.
int etc1_decode_image(const etc1_byte* pIn, etc1_byte* pOut,
etc1_uint32 width, etc1_uint32 height,
etc1_uint32 pixelSize, etc1_uint32 stride);
// Size of a PKM header, in bytes.
#define ETC_PKM_HEADER_SIZE 16
// Format a PKM header
void etc1_pkm_format_header(etc1_byte* pHeader, etc1_uint32 width, etc1_uint32 height);
// Check if a PKM header is correctly formatted.
etc1_bool etc1_pkm_is_valid(const etc1_byte* pHeader);
// Read the image width from a PKM header
etc1_uint32 etc1_pkm_get_width(const etc1_byte* pHeader);
// Read the image height from a PKM header
etc1_uint32 etc1_pkm_get_height(const etc1_byte* pHeader);
#ifdef __cplusplus
}
#endif
#endif

24
3rdparty/etc2/LICENSE.txt vendored Normal file
View File

@@ -0,0 +1,24 @@
Copyright (c) 2013, Bartosz Taudul <wolf.pld@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the <organization> nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

90
3rdparty/etc2/Math.hpp vendored Normal file
View File

@@ -0,0 +1,90 @@
#ifndef __DARKRL__MATH_HPP__
#define __DARKRL__MATH_HPP__
#include <algorithm>
#include <math.h>
#include "Types.hpp"
template<typename T>
inline T AlignPOT( T val )
{
if( val == 0 ) return 1;
val--;
for( unsigned int i=1; i<sizeof( T ) * 8; i <<= 1 )
{
val |= val >> i;
}
return val + 1;
}
inline int CountSetBits( uint32 val )
{
val -= ( val >> 1 ) & 0x55555555;
val = ( ( val >> 2 ) & 0x33333333 ) + ( val & 0x33333333 );
val = ( ( val >> 4 ) + val ) & 0x0f0f0f0f;
val += val >> 8;
val += val >> 16;
return val & 0x0000003f;
}
inline int CountLeadingZeros( uint32 val )
{
val |= val >> 1;
val |= val >> 2;
val |= val >> 4;
val |= val >> 8;
val |= val >> 16;
return 32 - CountSetBits( val );
}
inline float sRGB2linear( float v )
{
const float a = 0.055f;
if( v <= 0.04045f )
{
return v / 12.92f;
}
else
{
return powf( ( v + a ) / ( 1 + a ), 2.4f );
}
}
inline float linear2sRGB( float v )
{
const float a = 0.055f;
if( v <= 0.0031308f )
{
return 12.92f * v;
}
else
{
return ( 1 + a ) * pow( v, 1/2.4f ) - a;
}
}
template<class T>
inline T SmoothStep( T x )
{
return x*x*(3-2*x);
}
inline uint8 clampu8( int32 val )
{
return std::min( std::max( 0, val ), 255 );
}
template<class T>
inline T sq( T val )
{
return val * val;
}
static inline int mul8bit( int a, int b )
{
int t = a*b + 128;
return ( t + ( t >> 8 ) ) >> 8;
}
#endif

51
3rdparty/etc2/ProcessCommon.hpp vendored Normal file
View File

@@ -0,0 +1,51 @@
#ifndef __PROCESSCOMMON_HPP__
#define __PROCESSCOMMON_HPP__
#include <assert.h>
#include <stddef.h>
#include "Types.hpp"
template<class T>
static size_t GetLeastError( const T* err, size_t num )
{
size_t idx = 0;
for( size_t i=1; i<num; i++ )
{
if( err[i] < err[idx] )
{
idx = i;
}
}
return idx;
}
static uint64 FixByteOrder( uint64 d )
{
return ( ( d & 0x00000000FFFFFFFF ) ) |
( ( d & 0xFF00000000000000 ) >> 24 ) |
( ( d & 0x000000FF00000000 ) << 24 ) |
( ( d & 0x00FF000000000000 ) >> 8 ) |
( ( d & 0x0000FF0000000000 ) << 8 );
}
template<class T, class S>
static uint64 EncodeSelectors( uint64 d, const T terr[2][8], const S tsel[16][8], const uint32* id )
{
size_t tidx[2];
tidx[0] = GetLeastError( terr[0], 8 );
tidx[1] = GetLeastError( terr[1], 8 );
d |= tidx[0] << 26;
d |= tidx[1] << 29;
for( int i=0; i<16; i++ )
{
uint64 t = tsel[i][tidx[id[i]%2]];
d |= ( t & 0x1 ) << ( i + 32 );
d |= ( t & 0x2 ) << ( i + 47 );
}
return d;
}
#endif

719
3rdparty/etc2/ProcessRGB.cpp vendored Normal file
View File

@@ -0,0 +1,719 @@
#include <string.h>
#include "Math.hpp"
#include "ProcessCommon.hpp"
#include "ProcessRGB.hpp"
#include "Tables.hpp"
#include "Types.hpp"
#include "Vector.hpp"
#include <bx/endian.h>
#ifdef __SSE4_1__
# ifdef _MSC_VER
# include <intrin.h>
# include <Windows.h>
# else
# include <x86intrin.h>
# endif
#endif
namespace
{
typedef uint16 v4i[4];
void Average( const uint8* data, v4i* a )
{
#ifdef __SSE4_1__
__m128i d0 = _mm_loadu_si128(((__m128i*)data) + 0);
__m128i d1 = _mm_loadu_si128(((__m128i*)data) + 1);
__m128i d2 = _mm_loadu_si128(((__m128i*)data) + 2);
__m128i d3 = _mm_loadu_si128(((__m128i*)data) + 3);
__m128i d0l = _mm_unpacklo_epi8(d0, _mm_setzero_si128());
__m128i d0h = _mm_unpackhi_epi8(d0, _mm_setzero_si128());
__m128i d1l = _mm_unpacklo_epi8(d1, _mm_setzero_si128());
__m128i d1h = _mm_unpackhi_epi8(d1, _mm_setzero_si128());
__m128i d2l = _mm_unpacklo_epi8(d2, _mm_setzero_si128());
__m128i d2h = _mm_unpackhi_epi8(d2, _mm_setzero_si128());
__m128i d3l = _mm_unpacklo_epi8(d3, _mm_setzero_si128());
__m128i d3h = _mm_unpackhi_epi8(d3, _mm_setzero_si128());
__m128i sum0 = _mm_add_epi16(d0l, d1l);
__m128i sum1 = _mm_add_epi16(d0h, d1h);
__m128i sum2 = _mm_add_epi16(d2l, d3l);
__m128i sum3 = _mm_add_epi16(d2h, d3h);
__m128i sum0l = _mm_unpacklo_epi16(sum0, _mm_setzero_si128());
__m128i sum0h = _mm_unpackhi_epi16(sum0, _mm_setzero_si128());
__m128i sum1l = _mm_unpacklo_epi16(sum1, _mm_setzero_si128());
__m128i sum1h = _mm_unpackhi_epi16(sum1, _mm_setzero_si128());
__m128i sum2l = _mm_unpacklo_epi16(sum2, _mm_setzero_si128());
__m128i sum2h = _mm_unpackhi_epi16(sum2, _mm_setzero_si128());
__m128i sum3l = _mm_unpacklo_epi16(sum3, _mm_setzero_si128());
__m128i sum3h = _mm_unpackhi_epi16(sum3, _mm_setzero_si128());
__m128i b0 = _mm_add_epi32(sum0l, sum0h);
__m128i b1 = _mm_add_epi32(sum1l, sum1h);
__m128i b2 = _mm_add_epi32(sum2l, sum2h);
__m128i b3 = _mm_add_epi32(sum3l, sum3h);
__m128i a0 = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(b2, b3), _mm_set1_epi32(4)), 3);
__m128i a1 = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(b0, b1), _mm_set1_epi32(4)), 3);
__m128i a2 = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(b1, b3), _mm_set1_epi32(4)), 3);
__m128i a3 = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(b0, b2), _mm_set1_epi32(4)), 3);
_mm_storeu_si128((__m128i*)&a[0], _mm_packus_epi32(_mm_shuffle_epi32(a0, _MM_SHUFFLE(3, 0, 1, 2)), _mm_shuffle_epi32(a1, _MM_SHUFFLE(3, 0, 1, 2))));
_mm_storeu_si128((__m128i*)&a[2], _mm_packus_epi32(_mm_shuffle_epi32(a2, _MM_SHUFFLE(3, 0, 1, 2)), _mm_shuffle_epi32(a3, _MM_SHUFFLE(3, 0, 1, 2))));
#else
uint32 r[4];
uint32 g[4];
uint32 b[4];
memset(r, 0, sizeof(r));
memset(g, 0, sizeof(g));
memset(b, 0, sizeof(b));
for( int j=0; j<4; j++ )
{
for( int i=0; i<4; i++ )
{
int index = (j & 2) + (i >> 1);
b[index] += *data++;
g[index] += *data++;
r[index] += *data++;
data++;
}
}
a[0][0] = uint16( (r[2] + r[3] + 4) / 8 );
a[0][1] = uint16( (g[2] + g[3] + 4) / 8 );
a[0][2] = uint16( (b[2] + b[3] + 4) / 8 );
a[0][3] = 0;
a[1][0] = uint16( (r[0] + r[1] + 4) / 8 );
a[1][1] = uint16( (g[0] + g[1] + 4) / 8 );
a[1][2] = uint16( (b[0] + b[1] + 4) / 8 );
a[1][3] = 0;
a[2][0] = uint16( (r[1] + r[3] + 4) / 8 );
a[2][1] = uint16( (g[1] + g[3] + 4) / 8 );
a[2][2] = uint16( (b[1] + b[3] + 4) / 8 );
a[2][3] = 0;
a[3][0] = uint16( (r[0] + r[2] + 4) / 8 );
a[3][1] = uint16( (g[0] + g[2] + 4) / 8 );
a[3][2] = uint16( (b[0] + b[2] + 4) / 8 );
a[3][3] = 0;
#endif
}
void CalcErrorBlock( const uint8* data, uint err[4][4] )
{
#ifdef __SSE4_1__
__m128i d0 = _mm_loadu_si128(((__m128i*)data) + 0);
__m128i d1 = _mm_loadu_si128(((__m128i*)data) + 1);
__m128i d2 = _mm_loadu_si128(((__m128i*)data) + 2);
__m128i d3 = _mm_loadu_si128(((__m128i*)data) + 3);
__m128i dm0 = _mm_and_si128(d0, _mm_set1_epi32(0x00FFFFFF));
__m128i dm1 = _mm_and_si128(d1, _mm_set1_epi32(0x00FFFFFF));
__m128i dm2 = _mm_and_si128(d2, _mm_set1_epi32(0x00FFFFFF));
__m128i dm3 = _mm_and_si128(d3, _mm_set1_epi32(0x00FFFFFF));
__m128i d0l = _mm_unpacklo_epi8(dm0, _mm_setzero_si128());
__m128i d0h = _mm_unpackhi_epi8(dm0, _mm_setzero_si128());
__m128i d1l = _mm_unpacklo_epi8(dm1, _mm_setzero_si128());
__m128i d1h = _mm_unpackhi_epi8(dm1, _mm_setzero_si128());
__m128i d2l = _mm_unpacklo_epi8(dm2, _mm_setzero_si128());
__m128i d2h = _mm_unpackhi_epi8(dm2, _mm_setzero_si128());
__m128i d3l = _mm_unpacklo_epi8(dm3, _mm_setzero_si128());
__m128i d3h = _mm_unpackhi_epi8(dm3, _mm_setzero_si128());
__m128i sum0 = _mm_add_epi16(d0l, d1l);
__m128i sum1 = _mm_add_epi16(d0h, d1h);
__m128i sum2 = _mm_add_epi16(d2l, d3l);
__m128i sum3 = _mm_add_epi16(d2h, d3h);
__m128i sum0l = _mm_unpacklo_epi16(sum0, _mm_setzero_si128());
__m128i sum0h = _mm_unpackhi_epi16(sum0, _mm_setzero_si128());
__m128i sum1l = _mm_unpacklo_epi16(sum1, _mm_setzero_si128());
__m128i sum1h = _mm_unpackhi_epi16(sum1, _mm_setzero_si128());
__m128i sum2l = _mm_unpacklo_epi16(sum2, _mm_setzero_si128());
__m128i sum2h = _mm_unpackhi_epi16(sum2, _mm_setzero_si128());
__m128i sum3l = _mm_unpacklo_epi16(sum3, _mm_setzero_si128());
__m128i sum3h = _mm_unpackhi_epi16(sum3, _mm_setzero_si128());
__m128i b0 = _mm_add_epi32(sum0l, sum0h);
__m128i b1 = _mm_add_epi32(sum1l, sum1h);
__m128i b2 = _mm_add_epi32(sum2l, sum2h);
__m128i b3 = _mm_add_epi32(sum3l, sum3h);
__m128i a0 = _mm_add_epi32(b2, b3);
__m128i a1 = _mm_add_epi32(b0, b1);
__m128i a2 = _mm_add_epi32(b1, b3);
__m128i a3 = _mm_add_epi32(b0, b2);
_mm_storeu_si128((__m128i*)&err[0], a0);
_mm_storeu_si128((__m128i*)&err[1], a1);
_mm_storeu_si128((__m128i*)&err[2], a2);
_mm_storeu_si128((__m128i*)&err[3], a3);
#else
uint terr[4][4];
memset(terr, 0, 16 * sizeof(uint));
for( int j=0; j<4; j++ )
{
for( int i=0; i<4; i++ )
{
int index = (j & 2) + (i >> 1);
uint d = *data++;
terr[index][0] += d;
d = *data++;
terr[index][1] += d;
d = *data++;
terr[index][2] += d;
data++;
}
}
for( int i=0; i<3; i++ )
{
err[0][i] = terr[2][i] + terr[3][i];
err[1][i] = terr[0][i] + terr[1][i];
err[2][i] = terr[1][i] + terr[3][i];
err[3][i] = terr[0][i] + terr[2][i];
}
for( int i=0; i<4; i++ )
{
err[i][3] = 0;
}
#endif
}
uint CalcError( const uint block[4], const v4i& average )
{
uint err = 0x3FFFFFFF; // Big value to prevent negative values, but small enough to prevent overflow
err -= block[0] * 2 * average[2];
err -= block[1] * 2 * average[1];
err -= block[2] * 2 * average[0];
err += 8 * ( sq( average[0] ) + sq( average[1] ) + sq( average[2] ) );
return err;
}
void ProcessAverages( v4i* a )
{
#ifdef __SSE4_1__
for( int i=0; i<2; i++ )
{
__m128i d = _mm_loadu_si128((__m128i*)a[i*2]);
__m128i t = _mm_add_epi16(_mm_mullo_epi16(d, _mm_set1_epi16(31)), _mm_set1_epi16(128));
__m128i c = _mm_srli_epi16(_mm_add_epi16(t, _mm_srli_epi16(t, 8)), 8);
__m128i c1 = _mm_shuffle_epi32(c, _MM_SHUFFLE(3, 2, 3, 2));
__m128i diff = _mm_sub_epi16(c, c1);
diff = _mm_max_epi16(diff, _mm_set1_epi16(-4));
diff = _mm_min_epi16(diff, _mm_set1_epi16(3));
__m128i co = _mm_add_epi16(c1, diff);
c = _mm_blend_epi16(co, c, 0xF0);
__m128i a0 = _mm_or_si128(_mm_slli_epi16(c, 3), _mm_srli_epi16(c, 2));
_mm_storeu_si128((__m128i*)a[4+i*2], a0);
}
for( int i=0; i<2; i++ )
{
__m128i d = _mm_loadu_si128((__m128i*)a[i*2]);
__m128i t0 = _mm_add_epi16(_mm_mullo_epi16(d, _mm_set1_epi16(15)), _mm_set1_epi16(128));
__m128i t1 = _mm_srli_epi16(_mm_add_epi16(t0, _mm_srli_epi16(t0, 8)), 8);
__m128i t2 = _mm_or_si128(t1, _mm_slli_epi16(t1, 4));
_mm_storeu_si128((__m128i*)a[i*2], t2);
}
#else
for( int i=0; i<2; i++ )
{
for( int j=0; j<3; j++ )
{
int32 c1 = mul8bit( a[i*2+1][j], 31 );
int32 c2 = mul8bit( a[i*2][j], 31 );
int32 diff = c2 - c1;
if( diff > 3 ) diff = 3;
else if( diff < -4 ) diff = -4;
int32 co = c1 + diff;
a[5+i*2][j] = ( c1 << 3 ) | ( c1 >> 2 );
a[4+i*2][j] = ( co << 3 ) | ( co >> 2 );
}
}
for( int i=0; i<4; i++ )
{
a[i][0] = g_avg2[mul8bit( a[i][0], 15 )];
a[i][1] = g_avg2[mul8bit( a[i][1], 15 )];
a[i][2] = g_avg2[mul8bit( a[i][2], 15 )];
}
#endif
}
void EncodeAverages( uint64& _d, const v4i* a, size_t idx )
{
uint64 d = _d;
d |= ( idx << 24 );
size_t base = idx << 1;
if( ( idx & 0x2 ) == 0 )
{
for( int i=0; i<3; i++ )
{
d |= uint64( a[base+0][i] >> 4 ) << ( i*8 );
d |= uint64( a[base+1][i] >> 4 ) << ( i*8 + 4 );
}
}
else
{
for( int i=0; i<3; i++ )
{
d |= uint64( a[base+1][i] & 0xF8 ) << ( i*8 );
int32 c = ( ( a[base+0][i] & 0xF8 ) - ( a[base+1][i] & 0xF8 ) ) >> 3;
c &= ~0xFFFFFFF8;
d |= ((uint64)c) << ( i*8 );
}
}
_d = d;
}
uint64 CheckSolid( const uint8* src )
{
#ifdef __SSE4_1__
__m128i d0 = _mm_loadu_si128(((__m128i*)src) + 0);
__m128i d1 = _mm_loadu_si128(((__m128i*)src) + 1);
__m128i d2 = _mm_loadu_si128(((__m128i*)src) + 2);
__m128i d3 = _mm_loadu_si128(((__m128i*)src) + 3);
__m128i c = _mm_shuffle_epi32(d0, _MM_SHUFFLE(0, 0, 0, 0));
__m128i c0 = _mm_cmpeq_epi8(d0, c);
__m128i c1 = _mm_cmpeq_epi8(d1, c);
__m128i c2 = _mm_cmpeq_epi8(d2, c);
__m128i c3 = _mm_cmpeq_epi8(d3, c);
__m128i m0 = _mm_and_si128(c0, c1);
__m128i m1 = _mm_and_si128(c2, c3);
__m128i m = _mm_and_si128(m0, m1);
if (!_mm_testc_si128(m, _mm_set1_epi32(-1)))
{
return 0;
}
#else
const uint8* ptr = src + 4;
for( int i=1; i<16; i++ )
{
if( memcmp( src, ptr, 4 ) != 0 )
{
return 0;
}
ptr += 4;
}
#endif
return 0x02000000 |
( uint( src[0] & 0xF8 ) << 16 ) |
( uint( src[1] & 0xF8 ) << 8 ) |
( uint( src[2] & 0xF8 ) );
}
void PrepareAverages( v4i a[8], const uint8* src, uint err[4] )
{
Average( src, a );
ProcessAverages( a );
uint errblock[4][4];
CalcErrorBlock( src, errblock );
for( int i=0; i<4; i++ )
{
err[i/2] += CalcError( errblock[i], a[i] );
err[2+i/2] += CalcError( errblock[i], a[i+4] );
}
}
void FindBestFit( uint64 terr[2][8], uint16 tsel[16][8], v4i a[8], const uint32* id, const uint8* data )
{
for( size_t i=0; i<16; i++ )
{
uint16* sel = tsel[i];
uint bid = id[i];
uint64* ter = terr[bid%2];
uint8 b = *data++;
uint8 g = *data++;
uint8 r = *data++;
data++;
int dr = a[bid][0] - r;
int dg = a[bid][1] - g;
int db = a[bid][2] - b;
#ifdef __SSE4_1__
// Reference implementation
__m128i pix = _mm_set1_epi32(dr * 77 + dg * 151 + db * 28);
// Taking the absolute value is way faster. The values are only used to sort, so the result will be the same.
__m128i error0 = _mm_abs_epi32(_mm_add_epi32(pix, g_table256_SIMD[0]));
__m128i error1 = _mm_abs_epi32(_mm_add_epi32(pix, g_table256_SIMD[1]));
__m128i error2 = _mm_abs_epi32(_mm_sub_epi32(pix, g_table256_SIMD[0]));
__m128i error3 = _mm_abs_epi32(_mm_sub_epi32(pix, g_table256_SIMD[1]));
__m128i index0 = _mm_and_si128(_mm_cmplt_epi32(error1, error0), _mm_set1_epi32(1));
__m128i minError0 = _mm_min_epi32(error0, error1);
__m128i index1 = _mm_sub_epi32(_mm_set1_epi32(2), _mm_cmplt_epi32(error3, error2));
__m128i minError1 = _mm_min_epi32(error2, error3);
__m128i minIndex0 = _mm_blendv_epi8(index0, index1, _mm_cmplt_epi32(minError1, minError0));
__m128i minError = _mm_min_epi32(minError0, minError1);
// Squaring the minimum error to produce correct values when adding
__m128i minErrorLow = _mm_shuffle_epi32(minError, _MM_SHUFFLE(1, 1, 0, 0));
__m128i squareErrorLow = _mm_mul_epi32(minErrorLow, minErrorLow);
squareErrorLow = _mm_add_epi64(squareErrorLow, _mm_loadu_si128(((__m128i*)ter) + 0));
_mm_storeu_si128(((__m128i*)ter) + 0, squareErrorLow);
__m128i minErrorHigh = _mm_shuffle_epi32(minError, _MM_SHUFFLE(3, 3, 2, 2));
__m128i squareErrorHigh = _mm_mul_epi32(minErrorHigh, minErrorHigh);
squareErrorHigh = _mm_add_epi64(squareErrorHigh, _mm_loadu_si128(((__m128i*)ter) + 1));
_mm_storeu_si128(((__m128i*)ter) + 1, squareErrorHigh);
// Taking the absolute value is way faster. The values are only used to sort, so the result will be the same.
error0 = _mm_abs_epi32(_mm_add_epi32(pix, g_table256_SIMD[2]));
error1 = _mm_abs_epi32(_mm_add_epi32(pix, g_table256_SIMD[3]));
error2 = _mm_abs_epi32(_mm_sub_epi32(pix, g_table256_SIMD[2]));
error3 = _mm_abs_epi32(_mm_sub_epi32(pix, g_table256_SIMD[3]));
index0 = _mm_and_si128(_mm_cmplt_epi32(error1, error0), _mm_set1_epi32(1));
minError0 = _mm_min_epi32(error0, error1);
index1 = _mm_sub_epi32(_mm_set1_epi32(2), _mm_cmplt_epi32(error3, error2));
minError1 = _mm_min_epi32(error2, error3);
__m128i minIndex1 = _mm_blendv_epi8(index0, index1, _mm_cmplt_epi32(minError1, minError0));
minError = _mm_min_epi32(minError0, minError1);
// Squaring the minimum error to produce correct values when adding
minErrorLow = _mm_shuffle_epi32(minError, _MM_SHUFFLE(1, 1, 0, 0));
squareErrorLow = _mm_mul_epi32(minErrorLow, minErrorLow);
squareErrorLow = _mm_add_epi64(squareErrorLow, _mm_loadu_si128(((__m128i*)ter) + 2));
_mm_storeu_si128(((__m128i*)ter) + 2, squareErrorLow);
minErrorHigh = _mm_shuffle_epi32(minError, _MM_SHUFFLE(3, 3, 2, 2));
squareErrorHigh = _mm_mul_epi32(minErrorHigh, minErrorHigh);
squareErrorHigh = _mm_add_epi64(squareErrorHigh, _mm_loadu_si128(((__m128i*)ter) + 3));
_mm_storeu_si128(((__m128i*)ter) + 3, squareErrorHigh);
__m128i minIndex = _mm_packs_epi32(minIndex0, minIndex1);
_mm_storeu_si128((__m128i*)sel, minIndex);
#else
int pix = dr * 77 + dg * 151 + db * 28;
for( int t=0; t<8; t++ )
{
const int64* tab = g_table256[t];
uint idx = 0;
uint64 err = sq( tab[0] + pix );
for( int j=1; j<4; j++ )
{
uint64 local = sq( tab[j] + pix );
if( local < err )
{
err = local;
idx = j;
}
}
*sel++ = idx;
*ter++ += err;
}
#endif
}
}
#ifdef __SSE4_1__
// Non-reference implementation, but faster. Produces same results as the AVX2 version
void FindBestFit( uint32 terr[2][8], uint16 tsel[16][8], v4i a[8], const uint32* id, const uint8* data )
{
for( size_t i=0; i<16; i++ )
{
uint16* sel = tsel[i];
uint bid = id[i];
uint32* ter = terr[bid%2];
uint8 b = *data++;
uint8 g = *data++;
uint8 r = *data++;
data++;
int dr = a[bid][0] - r;
int dg = a[bid][1] - g;
int db = a[bid][2] - b;
// The scaling values are divided by two and rounded, to allow the differences to be in the range of signed int16
// This produces slightly different results, but is significant faster
__m128i pixel = _mm_set1_epi16(dr * 38 + dg * 76 + db * 14);
__m128i pix = _mm_abs_epi16(pixel);
// Taking the absolute value is way faster. The values are only used to sort, so the result will be the same.
// Since the selector table is symmetrical, we need to calculate the difference only for half of the entries.
__m128i error0 = _mm_abs_epi16(_mm_sub_epi16(pix, g_table128_SIMD[0]));
__m128i error1 = _mm_abs_epi16(_mm_sub_epi16(pix, g_table128_SIMD[1]));
__m128i index = _mm_and_si128(_mm_cmplt_epi16(error1, error0), _mm_set1_epi16(1));
__m128i minError = _mm_min_epi16(error0, error1);
// Exploiting symmetry of the selector table and use the sign bit
// This produces slightly different results, but is needed to produce same results as AVX2 implementation
__m128i indexBit = _mm_andnot_si128(_mm_srli_epi16(pixel, 15), _mm_set1_epi8(-1));
__m128i minIndex = _mm_or_si128(index, _mm_add_epi16(indexBit, indexBit));
// Squaring the minimum error to produce correct values when adding
__m128i squareErrorLo = _mm_mullo_epi16(minError, minError);
__m128i squareErrorHi = _mm_mulhi_epi16(minError, minError);
__m128i squareErrorLow = _mm_unpacklo_epi16(squareErrorLo, squareErrorHi);
__m128i squareErrorHigh = _mm_unpackhi_epi16(squareErrorLo, squareErrorHi);
squareErrorLow = _mm_add_epi32(squareErrorLow, _mm_loadu_si128(((__m128i*)ter) + 0));
_mm_storeu_si128(((__m128i*)ter) + 0, squareErrorLow);
squareErrorHigh = _mm_add_epi32(squareErrorHigh, _mm_loadu_si128(((__m128i*)ter) + 1));
_mm_storeu_si128(((__m128i*)ter) + 1, squareErrorHigh);
_mm_storeu_si128((__m128i*)sel, minIndex);
}
}
#endif
uint8_t convert6(float f)
{
int i = (std::min(std::max(static_cast<int>(f), 0), 1023) - 15) >> 1;
return (i + 11 - ((i + 11) >> 7) - ((i + 4) >> 7)) >> 3;
}
uint8_t convert7(float f)
{
int i = (std::min(std::max(static_cast<int>(f), 0), 1023) - 15) >> 1;
return (i + 9 - ((i + 9) >> 8) - ((i + 6) >> 8)) >> 2;
}
std::pair<uint64, uint64> Planar(const uint8* src)
{
int32 r = 0;
int32 g = 0;
int32 b = 0;
for (int i = 0; i < 16; ++i)
{
b += src[i * 4 + 0];
g += src[i * 4 + 1];
r += src[i * 4 + 2];
}
int32 difRyz = 0;
int32 difGyz = 0;
int32 difByz = 0;
int32 difRxz = 0;
int32 difGxz = 0;
int32 difBxz = 0;
const int32 scaling[] = { -255, -85, 85, 255 };
for (int i = 0; i < 16; ++i)
{
int32 difB = (static_cast<int>(src[i * 4 + 0]) << 4) - b;
int32 difG = (static_cast<int>(src[i * 4 + 1]) << 4) - g;
int32 difR = (static_cast<int>(src[i * 4 + 2]) << 4) - r;
difRyz += difR * scaling[i % 4];
difGyz += difG * scaling[i % 4];
difByz += difB * scaling[i % 4];
difRxz += difR * scaling[i / 4];
difGxz += difG * scaling[i / 4];
difBxz += difB * scaling[i / 4];
}
const float scale = -4.0f / ((255 * 255 * 8.0f + 85 * 85 * 8.0f) * 16.0f);
float aR = difRxz * scale;
float aG = difGxz * scale;
float aB = difBxz * scale;
float bR = difRyz * scale;
float bG = difGyz * scale;
float bB = difByz * scale;
float dR = r * (4.0f / 16.0f);
float dG = g * (4.0f / 16.0f);
float dB = b * (4.0f / 16.0f);
// calculating the three colors RGBO, RGBH, and RGBV. RGB = df - af * x - bf * y;
float cofR = (aR * 255.0f + (bR * 255.0f + dR));
float cofG = (aG * 255.0f + (bG * 255.0f + dG));
float cofB = (aB * 255.0f + (bB * 255.0f + dB));
float chfR = (aR * -425.0f + (bR * 255.0f + dR));
float chfG = (aG * -425.0f + (bG * 255.0f + dG));
float chfB = (aB * -425.0f + (bB * 255.0f + dB));
float cvfR = (aR * 255.0f + (bR * -425.0f + dR));
float cvfG = (aG * 255.0f + (bG * -425.0f + dG));
float cvfB = (aB * 255.0f + (bB * -425.0f + dB));
// convert to r6g7b6
int32 coR = convert6(cofR);
int32 coG = convert7(cofG);
int32 coB = convert6(cofB);
int32 chR = convert6(chfR);
int32 chG = convert7(chfG);
int32 chB = convert6(chfB);
int32 cvR = convert6(cvfR);
int32 cvG = convert7(cvfG);
int32 cvB = convert6(cvfB);
// Error calculation
int32 ro0 = coR;
int32 go0 = coG;
int32 bo0 = coB;
int32 ro1 = (ro0 >> 4) | (ro0 << 2);
int32 go1 = (go0 >> 6) | (go0 << 1);
int32 bo1 = (bo0 >> 4) | (bo0 << 2);
int32 ro2 = (ro1 << 2) + 2;
int32 go2 = (go1 << 2) + 2;
int32 bo2 = (bo1 << 2) + 2;
int32 rh0 = chR;
int32 gh0 = chG;
int32 bh0 = chB;
int32 rh1 = (rh0 >> 4) | (rh0 << 2);
int32 gh1 = (gh0 >> 6) | (gh0 << 1);
int32 bh1 = (bh0 >> 4) | (bh0 << 2);
int32 rh2 = rh1 - ro1;
int32 gh2 = gh1 - go1;
int32 bh2 = bh1 - bo1;
int32 rv0 = cvR;
int32 gv0 = cvG;
int32 bv0 = cvB;
int32 rv1 = (rv0 >> 4) | (rv0 << 2);
int32 gv1 = (gv0 >> 6) | (gv0 << 1);
int32 bv1 = (bv0 >> 4) | (bv0 << 2);
int32 rv2 = rv1 - ro1;
int32 gv2 = gv1 - go1;
int32 bv2 = bv1 - bo1;
uint64 error = 0;
for (int i = 0; i < 16; ++i)
{
int32 cR = clampu8((rh2 * (i / 4) + rv2 * (i % 4) + ro2) >> 2);
int32 cG = clampu8((gh2 * (i / 4) + gv2 * (i % 4) + go2) >> 2);
int32 cB = clampu8((bh2 * (i / 4) + bv2 * (i % 4) + bo2) >> 2);
int32 difB = static_cast<int>(src[i * 4 + 0]) - cB;
int32 difG = static_cast<int>(src[i * 4 + 1]) - cG;
int32 difR = static_cast<int>(src[i * 4 + 2]) - cR;
int32 dif = difR * 38 + difG * 76 + difB * 14;
error += dif * dif;
}
/**/
uint32 rgbv = cvB | (cvG << 6) | (cvR << 13);
uint32 rgbh = chB | (chG << 6) | (chR << 13);
uint32 hi = rgbv | ((rgbh & 0x1FFF) << 19);
uint32 lo = (chR & 0x1) | 0x2 | ((chR << 1) & 0x7C);
lo |= ((coB & 0x07) << 7) | ((coB & 0x18) << 8) | ((coB & 0x20) << 11);
lo |= ((coG & 0x3F) << 17) | ((coG & 0x40) << 18);
lo |= coR << 25;
const int32 idx = (coR & 0x20) | ((coG & 0x20) >> 1) | ((coB & 0x1E) >> 1);
lo |= g_flags[idx];
uint64 result = static_cast<uint32>(bx::endianSwap(lo));
result |= static_cast<uint64>(static_cast<uint32>(bx::endianSwap(hi))) << 32;
return std::make_pair(result, error);
}
template<class T, class S>
uint64 EncodeSelectors( uint64 d, const T terr[2][8], const S tsel[16][8], const uint32* id, const uint64 value, const uint64 error)
{
size_t tidx[2];
tidx[0] = GetLeastError( terr[0], 8 );
tidx[1] = GetLeastError( terr[1], 8 );
if ((terr[0][tidx[0]] + terr[1][tidx[1]]) >= error)
{
return value;
}
d |= tidx[0] << 26;
d |= tidx[1] << 29;
for( int i=0; i<16; i++ )
{
uint64 t = tsel[i][tidx[id[i]%2]];
d |= ( t & 0x1 ) << ( i + 32 );
d |= ( t & 0x2 ) << ( i + 47 );
}
return FixByteOrder(d);
}
}
uint64 ProcessRGB( const uint8* src )
{
uint64 d = CheckSolid( src );
if( d != 0 ) return d;
v4i a[8];
uint err[4] = {};
PrepareAverages( a, src, err );
size_t idx = GetLeastError( err, 4 );
EncodeAverages( d, a, idx );
#if defined __SSE4_1__ && !defined REFERENCE_IMPLEMENTATION
uint32 terr[2][8] = {};
#else
uint64 terr[2][8] = {};
#endif
uint16 tsel[16][8];
const uint32* id = g_id[idx];
FindBestFit( terr, tsel, a, id, src );
return FixByteOrder( EncodeSelectors( d, terr, tsel, id ) );
}
uint64 ProcessRGB_ETC2( const uint8* src )
{
std::pair<uint64, uint64> result = Planar( src );
uint64 d = 0;
v4i a[8];
uint err[4] = {};
PrepareAverages( a, src, err );
size_t idx = GetLeastError( err, 4 );
EncodeAverages( d, a, idx );
uint64 terr[2][8] = {};
uint16 tsel[16][8];
const uint32* id = g_id[idx];
FindBestFit( terr, tsel, a, id, src );
return EncodeSelectors( d, terr, tsel, id, result.first, result.second );
}

9
3rdparty/etc2/ProcessRGB.hpp vendored Normal file
View File

@@ -0,0 +1,9 @@
#ifndef __PROCESSRGB_HPP__
#define __PROCESSRGB_HPP__
#include "Types.hpp"
uint64 ProcessRGB( const uint8* src );
uint64 ProcessRGB_ETC2( const uint8* src );
#endif

109
3rdparty/etc2/Tables.cpp vendored Normal file
View File

@@ -0,0 +1,109 @@
#include "Tables.hpp"
const int32 g_table[8][4] = {
{ 2, 8, -2, -8 },
{ 5, 17, -5, -17 },
{ 9, 29, -9, -29 },
{ 13, 42, -13, -42 },
{ 18, 60, -18, -60 },
{ 24, 80, -24, -80 },
{ 33, 106, -33, -106 },
{ 47, 183, -47, -183 }
};
const int64 g_table256[8][4] = {
{ 2*256, 8*256, -2*256, -8*256 },
{ 5*256, 17*256, -5*256, -17*256 },
{ 9*256, 29*256, -9*256, -29*256 },
{ 13*256, 42*256, -13*256, -42*256 },
{ 18*256, 60*256, -18*256, -60*256 },
{ 24*256, 80*256, -24*256, -80*256 },
{ 33*256, 106*256, -33*256, -106*256 },
{ 47*256, 183*256, -47*256, -183*256 }
};
const uint32 g_id[4][16] = {
{ 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2 },
{ 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4 },
{ 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6 }
};
const uint32 g_avg2[16] = {
0x00,
0x11,
0x22,
0x33,
0x44,
0x55,
0x66,
0x77,
0x88,
0x99,
0xAA,
0xBB,
0xCC,
0xDD,
0xEE,
0xFF
};
const uint32 g_flags[64] = {
0x80800402, 0x80800402, 0x80800402, 0x80800402,
0x80800402, 0x80800402, 0x80800402, 0x8080E002,
0x80800402, 0x80800402, 0x8080E002, 0x8080E002,
0x80800402, 0x8080E002, 0x8080E002, 0x8080E002,
0x80000402, 0x80000402, 0x80000402, 0x80000402,
0x80000402, 0x80000402, 0x80000402, 0x8000E002,
0x80000402, 0x80000402, 0x8000E002, 0x8000E002,
0x80000402, 0x8000E002, 0x8000E002, 0x8000E002,
0x00800402, 0x00800402, 0x00800402, 0x00800402,
0x00800402, 0x00800402, 0x00800402, 0x0080E002,
0x00800402, 0x00800402, 0x0080E002, 0x0080E002,
0x00800402, 0x0080E002, 0x0080E002, 0x0080E002,
0x00000402, 0x00000402, 0x00000402, 0x00000402,
0x00000402, 0x00000402, 0x00000402, 0x0000E002,
0x00000402, 0x00000402, 0x0000E002, 0x0000E002,
0x00000402, 0x0000E002, 0x0000E002, 0x0000E002
};
#ifdef __SSE4_1__
const uint8 g_flags_AVX2[64] =
{
0x63, 0x63, 0x63, 0x63,
0x63, 0x63, 0x63, 0x7D,
0x63, 0x63, 0x7D, 0x7D,
0x63, 0x7D, 0x7D, 0x7D,
0x43, 0x43, 0x43, 0x43,
0x43, 0x43, 0x43, 0x5D,
0x43, 0x43, 0x5D, 0x5D,
0x43, 0x5D, 0x5D, 0x5D,
0x23, 0x23, 0x23, 0x23,
0x23, 0x23, 0x23, 0x3D,
0x23, 0x23, 0x3D, 0x3D,
0x23, 0x3D, 0x3D, 0x3D,
0x03, 0x03, 0x03, 0x03,
0x03, 0x03, 0x03, 0x1D,
0x03, 0x03, 0x1D, 0x1D,
0x03, 0x1D, 0x1D, 0x1D,
};
const __m128i g_table_SIMD[2] =
{
_mm_setr_epi16( 2, 5, 9, 13, 18, 24, 33, 47),
_mm_setr_epi16( 8, 17, 29, 42, 60, 80, 106, 183)
};
const __m128i g_table128_SIMD[2] =
{
_mm_setr_epi16( 2*128, 5*128, 9*128, 13*128, 18*128, 24*128, 33*128, 47*128),
_mm_setr_epi16( 8*128, 17*128, 29*128, 42*128, 60*128, 80*128, 106*128, 183*128)
};
const __m128i g_table256_SIMD[4] =
{
_mm_setr_epi32( 2*256, 5*256, 9*256, 13*256),
_mm_setr_epi32( 8*256, 17*256, 29*256, 42*256),
_mm_setr_epi32( 18*256, 24*256, 33*256, 47*256),
_mm_setr_epi32( 60*256, 80*256, 106*256, 183*256)
};
#endif

25
3rdparty/etc2/Tables.hpp vendored Normal file
View File

@@ -0,0 +1,25 @@
#ifndef __TABLES_HPP__
#define __TABLES_HPP__
#include "Types.hpp"
#ifdef __SSE4_1__
#include <smmintrin.h>
#endif
extern const int32 g_table[8][4];
extern const int64 g_table256[8][4];
extern const uint32 g_id[4][16];
extern const uint32 g_avg2[16];
extern const uint32 g_flags[64];
#ifdef __SSE4_1__
extern const uint8 g_flags_AVX2[64];
extern const __m128i g_table_SIMD[2];
extern const __m128i g_table128_SIMD[2];
extern const __m128i g_table256_SIMD[4];
#endif
#endif

17
3rdparty/etc2/Types.hpp vendored Normal file
View File

@@ -0,0 +1,17 @@
#ifndef __DARKRL__TYPES_HPP__
#define __DARKRL__TYPES_HPP__
#include <stdint.h>
typedef int8_t int8;
typedef uint8_t uint8;
typedef int16_t int16;
typedef uint16_t uint16;
typedef int32_t int32;
typedef uint32_t uint32;
typedef int64_t int64;
typedef uint64_t uint64;
typedef unsigned int uint;
#endif

222
3rdparty/etc2/Vector.hpp vendored Normal file
View File

@@ -0,0 +1,222 @@
#ifndef __DARKRL__VECTOR_HPP__
#define __DARKRL__VECTOR_HPP__
#include <assert.h>
#include <algorithm>
#include <math.h>
#include "Math.hpp"
#include "Types.hpp"
template<class T>
struct Vector2
{
Vector2() : x( 0 ), y( 0 ) {}
Vector2( T v ) : x( v ), y( v ) {}
Vector2( T _x, T _y ) : x( _x ), y( _y ) {}
bool operator==( const Vector2<T>& rhs ) const { return x == rhs.x && y == rhs.y; }
bool operator!=( const Vector2<T>& rhs ) const { return !( *this == rhs ); }
Vector2<T>& operator+=( const Vector2<T>& rhs )
{
x += rhs.x;
y += rhs.y;
return *this;
}
Vector2<T>& operator-=( const Vector2<T>& rhs )
{
x -= rhs.x;
y -= rhs.y;
return *this;
}
Vector2<T>& operator*=( const Vector2<T>& rhs )
{
x *= rhs.x;
y *= rhs.y;
return *this;
}
T x, y;
};
template<class T>
Vector2<T> operator+( const Vector2<T>& lhs, const Vector2<T>& rhs )
{
return Vector2<T>( lhs.x + rhs.x, lhs.y + rhs.y );
}
template<class T>
Vector2<T> operator-( const Vector2<T>& lhs, const Vector2<T>& rhs )
{
return Vector2<T>( lhs.x - rhs.x, lhs.y - rhs.y );
}
template<class T>
Vector2<T> operator*( const Vector2<T>& lhs, const float& rhs )
{
return Vector2<T>( lhs.x * rhs, lhs.y * rhs );
}
template<class T>
Vector2<T> operator/( const Vector2<T>& lhs, const T& rhs )
{
return Vector2<T>( lhs.x / rhs, lhs.y / rhs );
}
typedef Vector2<int32> v2i;
typedef Vector2<float> v2f;
template<class T>
struct Vector3
{
Vector3() : x( 0 ), y( 0 ), z( 0 ) {}
Vector3( T v ) : x( v ), y( v ), z( v ) {}
Vector3( T _x, T _y, T _z ) : x( _x ), y( _y ), z( _z ) {}
template<class Y>
Vector3( const Vector3<Y>& v ) : x( T( v.x ) ), y( T( v.y ) ), z( T( v.z ) ) {}
T Luminance() const { return T( x * 0.3f + y * 0.59f + z * 0.11f ); }
void Clamp()
{
x = std::min( T(1), std::max( T(0), x ) );
y = std::min( T(1), std::max( T(0), y ) );
z = std::min( T(1), std::max( T(0), z ) );
}
bool operator==( const Vector3<T>& rhs ) const { return x == rhs.x && y == rhs.y && z == rhs.z; }
bool operator!=( const Vector2<T>& rhs ) const { return !( *this == rhs ); }
T& operator[]( uint idx ) { assert( idx < 3 ); return ((T*)this)[idx]; }
const T& operator[]( uint idx ) const { assert( idx < 3 ); return ((T*)this)[idx]; }
Vector3<T> operator+=( const Vector3<T>& rhs )
{
x += rhs.x;
y += rhs.y;
z += rhs.z;
return *this;
}
Vector3<T> operator*=( const Vector3<T>& rhs )
{
x *= rhs.x;
y *= rhs.y;
z *= rhs.z;
return *this;
}
Vector3<T> operator*=( const float& rhs )
{
x *= rhs;
y *= rhs;
z *= rhs;
return *this;
}
T x, y, z;
T padding;
};
template<class T>
Vector3<T> operator+( const Vector3<T>& lhs, const Vector3<T>& rhs )
{
return Vector3<T>( lhs.x + rhs.x, lhs.y + rhs.y, lhs.z + rhs.z );
}
template<class T>
Vector3<T> operator-( const Vector3<T>& lhs, const Vector3<T>& rhs )
{
return Vector3<T>( lhs.x - rhs.x, lhs.y - rhs.y, lhs.z - rhs.z );
}
template<class T>
Vector3<T> operator*( const Vector3<T>& lhs, const Vector3<T>& rhs )
{
return Vector3<T>( lhs.x * rhs.x, lhs.y * rhs.y, lhs.z * rhs.z );
}
template<class T>
Vector3<T> operator*( const Vector3<T>& lhs, const float& rhs )
{
return Vector3<T>( T( lhs.x * rhs ), T( lhs.y * rhs ), T( lhs.z * rhs ) );
}
template<class T>
Vector3<T> operator/( const Vector3<T>& lhs, const T& rhs )
{
return Vector3<T>( lhs.x / rhs, lhs.y / rhs, lhs.z / rhs );
}
template<class T>
bool operator<( const Vector3<T>& lhs, const Vector3<T>& rhs )
{
return lhs.Luminance() < rhs.Luminance();
}
typedef Vector3<int32> v3i;
typedef Vector3<float> v3f;
typedef Vector3<uint8> v3b;
static inline v3b v3f_to_v3b( const v3f& v )
{
return v3b( uint8( std::min( 1.f, v.x ) * 255 ), uint8( std::min( 1.f, v.y ) * 255 ), uint8( std::min( 1.f, v.z ) * 255 ) );
}
template<class T>
Vector3<T> Mix( const Vector3<T>& v1, const Vector3<T>& v2, float amount )
{
return v1 + ( v2 - v1 ) * amount;
}
template<>
inline v3b Mix( const v3b& v1, const v3b& v2, float amount )
{
return v3b( v3f( v1 ) + ( v3f( v2 ) - v3f( v1 ) ) * amount );
}
template<class T>
Vector3<T> Desaturate( const Vector3<T>& v )
{
T l = v.Luminance();
return Vector3<T>( l, l, l );
}
template<class T>
Vector3<T> Desaturate( const Vector3<T>& v, float mul )
{
T l = T( v.Luminance() * mul );
return Vector3<T>( l, l, l );
}
template<class T>
Vector3<T> pow( const Vector3<T>& base, float exponent )
{
return Vector3<T>(
pow( base.x, exponent ),
pow( base.y, exponent ),
pow( base.z, exponent ) );
}
template<class T>
Vector3<T> sRGB2linear( const Vector3<T>& v )
{
return Vector3<T>(
sRGB2linear( v.x ),
sRGB2linear( v.y ),
sRGB2linear( v.z ) );
}
template<class T>
Vector3<T> linear2sRGB( const Vector3<T>& v )
{
return Vector3<T>(
linear2sRGB( v.x ),
linear2sRGB( v.y ),
linear2sRGB( v.z ) );
}
#endif

32
3rdparty/iqa/LICENSE vendored Normal file
View File

@@ -0,0 +1,32 @@
/*
* Copyright (c) 2011, Tom Distler (http://tdistler.com)
* All rights reserved.
*
* The BSD License
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* - Neither the name of the tdistler.com nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/

36
3rdparty/iqa/README.txt vendored Normal file
View File

@@ -0,0 +1,36 @@
Doxygen documentation can be found at: http://tdistler.com/iqa
BUILD:
All build artifacts end up in build/<configuration>, where <configuration> is
'debug' or 'release'.
Windows:
- Open iqa.sln, select 'Debug' or 'Release', and build. The output is a
static library 'iqa.lib'.
- To run the tests under the debugger, first right-click the 'test' project,
select Properties -> Configuration Properties -> Debugging and set
'Working Directory' to '$(OutDir)'. Then start the application.
Linux:
- Change directories into the root of the IQA branch you want to build.
- Type `make` for a debug build, or `make RELEASE=1` for a release build.
The output is a static library 'libiqa.a'.
- Type `make test` (or `make test RELEASE=1`) to build the unit tests.
- Type `make clean` (or `make clean RELEASE=1`) to delete all build
artifacts.
- To run the tests, `cd` to the build/<configuration> directory and type
`./test`.
USE:
- Include 'iqa.h' in your source file.
- Call iqa_* methods.
- Link against the IQA library.
HELP & SUPPORT:
Further help can be found at: https://sourceforge.net/projects/iqa/support

111
3rdparty/iqa/include/convolve.h vendored Normal file
View File

@@ -0,0 +1,111 @@
/*
* Copyright (c) 2011, Tom Distler (http://tdistler.com)
* All rights reserved.
*
* The BSD License
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* - Neither the name of the tdistler.com nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _CONVOLVE_H_
#define _CONVOLVE_H_
typedef float (*_iqa_get_pixel)(const float *img, int w, int h, int x, int y, float bnd_const);
/** Out-of-bounds array values are a mirrored reflection of the border values*/
float KBND_SYMMETRIC(const float *img, int w, int h, int x, int y, float bnd_const);
/** Out-of-bounds array values are set to the nearest border value */
float KBND_REPLICATE(const float *img, int w, int h, int x, int y, float bnd_const);
/** Out-of-bounds array values are set to 'bnd_const' */
float KBND_CONSTANT(const float *img, int w, int h, int x, int y, float bnd_const);
/** Defines a convolution kernel */
struct _kernel {
float *kernel; /**< Pointer to the kernel values */
int w; /**< The kernel width */
int h; /**< The kernel height */
int normalized; /**< 1 if the kernel values add up to 1. 0 otherwise */
_iqa_get_pixel bnd_opt; /**< Defines how out-of-bounds image values are handled */
float bnd_const; /**< If 'bnd_opt' is KBND_CONSTANT, this specifies the out-of-bounds value */
};
/**
* @brief Applies the specified kernel to the image.
* The kernel will be applied to all areas where it fits completely within
* the image. The resulting image will be smaller by half the kernel width
* and height (w - kw/2 and h - kh/2).
*
* @param img Image to modify
* @param w Image width
* @param h Image height
* @param k The kernel to apply
* @param result Buffer to hold the resulting image ((w-kw)*(h-kh), where kw
* and kh are the kernel width and height). If 0, the result
* will be written to the original image buffer.
* @param rw Optional. The width of the resulting image will be stored here.
* @param rh Optional. The height of the resulting image will be stored here.
*/
void _iqa_convolve(float *img, int w, int h, const struct _kernel *k, float *result, int *rw, int *rh);
/**
* The same as _iqa_convolve() except the kernel is applied to the entire image.
* In other words, the kernel is applied to all areas where the top-left corner
* of the kernel is in the image. Out-of-bound pixel value (off the right and
* bottom edges) are chosen based on the 'bnd_opt' and 'bnd_const' members of
* the kernel structure. The resulting array is the same size as the input
* image.
*
* @param img Image to modify
* @param w Image width
* @param h Image height
* @param k The kernel to apply
* @param result Buffer to hold the resulting image ((w-kw)*(h-kh), where kw
* and kh are the kernel width and height). If 0, the result
* will be written to the original image buffer.
* @return 0 if successful. Non-zero otherwise.
*/
int _iqa_img_filter(float *img, int w, int h, const struct _kernel *k, float *result);
/**
* Returns the filtered version of the specified pixel. If no kernel is given,
* the raw pixel value is returned.
*
* @param img Source image
* @param w Image width
* @param h Image height
* @param x The x location of the pixel to filter
* @param y The y location of the pixel to filter
* @param k Optional. The convolution kernel to apply to the pixel.
* @param kscale The scale of the kernel (for normalization). 1 for normalized
* kernels. Required if 'k' is not null.
* @return The filtered pixel value.
*/
float _iqa_filter_pixel(const float *img, int w, int h, int x, int y, const struct _kernel *k, const float kscale);
#endif /*_CONVOLVE_H_*/

55
3rdparty/iqa/include/decimate.h vendored Normal file
View File

@@ -0,0 +1,55 @@
/*
* Copyright (c) 2011, Tom Distler (http://tdistler.com)
* All rights reserved.
*
* The BSD License
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* - Neither the name of the tdistler.com nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _DECIMATE_H_
#define _DECIMATE_H_
#include "convolve.h"
/**
* @brief Downsamples (decimates) an image.
*
* @param img Image to modify
* @param w Image width
* @param h Image height
* @param factor Decimation factor
* @param k The kernel to apply (e.g. low-pass filter). Can be 0.
* @param result Buffer to hold the resulting image (w/factor*h/factor). If 0,
* the result will be written to the original image buffer.
* @param rw Optional. The width of the resulting image will be stored here.
* @param rh Optional. The height of the resulting image will be stored here.
* @return 0 on success.
*/
int _iqa_decimate(float *img, int w, int h, int factor, const struct _kernel *k, float *result, int *rw, int *rh);
#endif /*_DECIMATE_H_*/

134
3rdparty/iqa/include/iqa.h vendored Normal file
View File

@@ -0,0 +1,134 @@
/*
* Copyright (c) 2011, Tom Distler (http://tdistler.com)
* All rights reserved.
*
* The BSD License
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* - Neither the name of the tdistler.com nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _IQA_H_
#define _IQA_H_
#include "iqa_os.h"
/**
* Allows fine-grain control of the SSIM algorithm.
*/
struct iqa_ssim_args {
float alpha; /**< luminance exponent */
float beta; /**< contrast exponent */
float gamma; /**< structure exponent */
int L; /**< dynamic range (2^8 - 1)*/
float K1; /**< stabilization constant 1 */
float K2; /**< stabilization constant 2 */
int f; /**< scale factor. 0=default scaling, 1=no scaling */
};
/**
* Allows fine-grain control of the MS-SSIM algorithm.
*/
struct iqa_ms_ssim_args {
int wang; /**< 1=original algorithm by Wang, et al. 0=MS-SSIM* by Rouse/Hemami (default). */
int gaussian; /**< 1=11x11 Gaussian window (default). 0=8x8 linear window. */
int scales; /**< Number of scaled images to use. Default is 5. */
const float *alphas; /**< Pointer to array of alpha values for each scale. Required if 'scales' isn't 5. */
const float *betas; /**< Pointer to array of beta values for each scale. Required if 'scales' isn't 5. */
const float *gammas; /**< Pointer to array of gamma values for each scale. Required if 'scales' isn't 5. */
};
/**
* Calculates the Mean Squared Error between 2 equal-sized 8-bit images.
* @note The images must have the same width, height, and stride.
* @param ref Original reference image
* @param cmp Distorted image
* @param w Width of the images
* @param h Height of the images
* @param stride The length (in bytes) of each horizontal line in the image.
* This may be different from the image width.
* @return The MSE.
*/
float iqa_mse(const unsigned char *ref, const unsigned char *cmp, int w, int h, int stride);
/**
* Calculates the Peak Signal-to-Noise-Ratio between 2 equal-sized 8-bit
* images.
* @note The images must have the same width, height, and stride.
* @param ref Original reference image
* @param cmp Distorted image
* @param w Width of the images
* @param h Height of the images
* @param stride The length (in bytes) of each horizontal line in the image.
* This may be different from the image width.
* @return The PSNR.
*/
float iqa_psnr(const unsigned char *ref, const unsigned char *cmp, int w, int h, int stride);
/**
* Calculates the Structural SIMilarity between 2 equal-sized 8-bit images.
*
* See https://ece.uwaterloo.ca/~z70wang/publications/ssim.html
* @note The images must have the same width, height, and stride.
* @param ref Original reference image
* @param cmp Distorted image
* @param w Width of the images
* @param h Height of the images
* @param stride The length (in bytes) of each horizontal line in the image.
* This may be different from the image width.
* @param gaussian 0 = 8x8 square window, 1 = 11x11 circular-symmetric Gaussian
* weighting.
* @param args Optional SSIM arguments for fine control of the algorithm. 0 for
* defaults. Defaults are a=b=g=1.0, L=255, K1=0.01, K2=0.03
* @return The mean SSIM over the entire image (MSSIM), or INFINITY if error.
*/
float iqa_ssim(const unsigned char *ref, const unsigned char *cmp, int w, int h, int stride,
int gaussian, const struct iqa_ssim_args *args);
/**
* Calculates the Multi-Scale Structural SIMilarity between 2 equal-sized 8-bit
* images. The default algorithm is MS-SSIM* proposed by Rouse/Hemami 2008.
*
* See https://ece.uwaterloo.ca/~z70wang/publications/msssim.pdf and
* http://foulard.ece.cornell.edu/publications/dmr_hvei2008_paper.pdf
*
* @note 1. The images must have the same width, height, and stride.
* @note 2. The minimum image width or height is 2^(scales-1) * filter, where 'filter' is 11
* if a Gaussian window is being used, or 9 otherwise.
* @param ref Original reference image
* @param cmp Distorted image
* @param w Width of the images.
* @param h Height of the images.
* @param stride The length (in bytes) of each horizontal line in the image.
* This may be different from the image width.
* @param args Optional MS-SSIM arguments for fine control of the algorithm. 0
* for defaults. Defaults are wang=0, scales=5, gaussian=1.
* @return The mean MS-SSIM over the entire image, or INFINITY if error.
*/
float iqa_ms_ssim(const unsigned char *ref, const unsigned char *cmp, int w, int h, int stride,
const struct iqa_ms_ssim_args *args);
#endif /*_IQA_H_*/

68
3rdparty/iqa/include/iqa_os.h vendored Normal file
View File

@@ -0,0 +1,68 @@
/*
* Copyright (c) 2011, Tom Distler (http://tdistler.com)
* All rights reserved.
*
* The BSD License
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* - Neither the name of the tdistler.com nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _OS_H_
#define _OS_H_
/* Microsoft tends to implement features early, but they have a high legacy
* cost because they won't break existing implementations. As such, certain
* features we take for granted on other platforms (like C99) aren't fully
* implemented. This file is meant to rectify that.
*/
#ifdef WIN32
#include <windows.h>
#include <math.h>
#define IQA_INLINE __inline
#ifndef INFINITY
#define INFINITY (float)HUGE_VAL /**< Defined in C99 (Windows is C89) */
#endif /*INFINITY*/
#ifndef NAN
static const unsigned long __nan[2] = {0xffffffff, 0x7fffffff};
#define NAN (*(const float *) __nan) /**< Defined in C99 (Windows is C99) */
#endif
#define IQA_EXPORT __declspec(dllexport)
#else /* !Windows */
#define IQA_INLINE inline
#define IQA_EXPORT
#endif
#endif /* _OS_H_ */

64
3rdparty/iqa/include/math_utils.h vendored Normal file
View File

@@ -0,0 +1,64 @@
/*
* Copyright (c) 2011, Tom Distler (http://tdistler.com)
* All rights reserved.
*
* The BSD License
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* - Neither the name of the tdistler.com nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _MATH_UTILS_H_
#define _MATH_UTILS_H_
#include "iqa_os.h"
#include <math.h>
/**
* Rounds a float to the nearest integer.
*/
IQA_EXPORT int _round(float a);
IQA_EXPORT int _max(int x, int y);
IQA_EXPORT int _min(int x, int y);
/**
* Compares 2 floats to the specified digit of precision.
* @return 0 if equal, 1 otherwise.
*/
IQA_EXPORT int _cmp_float(float a, float b, int digits);
/**
* Compares 2 matrices with the specified precision. 'b' is assumed to be the
* same size as 'a' or smaller.
* @return 0 if equal, 1 otherwise
*/
IQA_EXPORT int _matrix_cmp(const float *a, const float *b, int w, int h, int digits);
#endif /*_MATH_UTILS_H_*/

117
3rdparty/iqa/include/ssim.h vendored Normal file
View File

@@ -0,0 +1,117 @@
/*
* Copyright (c) 2011, Tom Distler (http://tdistler.com)
* All rights reserved.
*
* The BSD License
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* - Neither the name of the tdistler.com nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _SSIM_H_
#define _SSIM_H_
#include "convolve.h"
/*
* Circular-symmetric Gaussian weighting.
* h(x,y) = hg(x,y)/SUM(SUM(hg)) , for normalization to 1.0
* hg(x,y) = e^( -0.5*( (x^2+y^2)/sigma^2 ) ) , where sigma was 1.5
*/
#define GAUSSIAN_LEN 11
static const float g_gaussian_window[GAUSSIAN_LEN][GAUSSIAN_LEN] = {
{0.000001f, 0.000008f, 0.000037f, 0.000112f, 0.000219f, 0.000274f, 0.000219f, 0.000112f, 0.000037f, 0.000008f, 0.000001f},
{0.000008f, 0.000058f, 0.000274f, 0.000831f, 0.001619f, 0.002021f, 0.001619f, 0.000831f, 0.000274f, 0.000058f, 0.000008f},
{0.000037f, 0.000274f, 0.001296f, 0.003937f, 0.007668f, 0.009577f, 0.007668f, 0.003937f, 0.001296f, 0.000274f, 0.000037f},
{0.000112f, 0.000831f, 0.003937f, 0.011960f, 0.023294f, 0.029091f, 0.023294f, 0.011960f, 0.003937f, 0.000831f, 0.000112f},
{0.000219f, 0.001619f, 0.007668f, 0.023294f, 0.045371f, 0.056662f, 0.045371f, 0.023294f, 0.007668f, 0.001619f, 0.000219f},
{0.000274f, 0.002021f, 0.009577f, 0.029091f, 0.056662f, 0.070762f, 0.056662f, 0.029091f, 0.009577f, 0.002021f, 0.000274f},
{0.000219f, 0.001619f, 0.007668f, 0.023294f, 0.045371f, 0.056662f, 0.045371f, 0.023294f, 0.007668f, 0.001619f, 0.000219f},
{0.000112f, 0.000831f, 0.003937f, 0.011960f, 0.023294f, 0.029091f, 0.023294f, 0.011960f, 0.003937f, 0.000831f, 0.000112f},
{0.000037f, 0.000274f, 0.001296f, 0.003937f, 0.007668f, 0.009577f, 0.007668f, 0.003937f, 0.001296f, 0.000274f, 0.000037f},
{0.000008f, 0.000058f, 0.000274f, 0.000831f, 0.001619f, 0.002021f, 0.001619f, 0.000831f, 0.000274f, 0.000058f, 0.000008f},
{0.000001f, 0.000008f, 0.000037f, 0.000112f, 0.000219f, 0.000274f, 0.000219f, 0.000112f, 0.000037f, 0.000008f, 0.000001f},
};
/*
* Equal weight square window.
* Each pixel is equally weighted (1/64) so that SUM(x) = 1.0
*/
#define SQUARE_LEN 8
static const float g_square_window[SQUARE_LEN][SQUARE_LEN] = {
{0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f},
{0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f},
{0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f},
{0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f},
{0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f},
{0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f},
{0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f},
{0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f},
};
/* Holds intermediate SSIM values for map-reduce operation. */
struct _ssim_int {
double l;
double c;
double s;
};
/* Defines the pointers to the map-reduce functions. */
typedef int (*_map)(const struct _ssim_int *, void *);
typedef float (*_reduce)(int, int, void *);
/* Arguments for map-reduce. The 'context' is user-defined. */
struct _map_reduce {
_map map;
_reduce reduce;
void *context;
};
/**
* Private method that calculates the SSIM value on a pre-processed image.
*
* The input images must have stride==width. This method does not scale.
*
* @note Image buffers are modified.
*
* Map-reduce is used for doing the final SSIM calculation. The map function is
* called for every pixel, and the reduce is called at the end. The context is
* caller-defined and *not* modified by this method.
*
* @param ref Original reference image
* @param cmp Distorted image
* @param w Width of the images
* @param h Height of the images
* @param k The kernel used as the window function
* @param mr Optional map-reduce functions to use to calculate SSIM. Required
* if 'args' is not null. Ignored if 'args' is null.
* @param args Optional SSIM arguments for fine control of the algorithm. 0 for defaults.
* Defaults are a=b=g=1.0, L=255, K1=0.01, K2=0.03
* @return The mean SSIM over the entire image (MSSIM), or INFINITY if error.
*/
float _iqa_ssim(float *ref, float *cmp, int w, int h, const struct _kernel *k, const struct _map_reduce *mr, const struct iqa_ssim_args *args);
#endif /* _SSIM_H_ */

195
3rdparty/iqa/source/convolve.c vendored Normal file
View File

@@ -0,0 +1,195 @@
/*
* Copyright (c) 2011, Tom Distler (http://tdistler.com)
* All rights reserved.
*
* The BSD License
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* - Neither the name of the tdistler.com nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "convolve.h"
#include <stdlib.h>
float KBND_SYMMETRIC(const float *img, int w, int h, int x, int y, float bnd_const)
{
(void)bnd_const;
if (x<0) x=-1-x;
else if (x>=w) x=(w-(x-w))-1;
if (y<0) y=-1-y;
else if (y>=h) y=(h-(y-h))-1;
return img[y*w + x];
}
float KBND_REPLICATE(const float *img, int w, int h, int x, int y, float bnd_const)
{
(void)bnd_const;
if (x<0) x=0;
if (x>=w) x=w-1;
if (y<0) y=0;
if (y>=h) y=h-1;
return img[y*w + x];
}
float KBND_CONSTANT(const float *img, int w, int h, int x, int y, float bnd_const)
{
if (x<0) x=0;
if (y<0) y=0;
if (x>=w || y>=h)
return bnd_const;
return img[y*w + x];
}
static float _calc_scale(const struct _kernel *k)
{
int ii,k_len;
double sum=0.0;
if (k->normalized)
return 1.0f;
else {
k_len = k->w * k->h;
for (ii=0; ii<k_len; ++ii)
sum += k->kernel[ii];
if (sum != 0.0)
return (float)(1.0 / sum);
return 1.0f;
}
}
void _iqa_convolve(float *img, int w, int h, const struct _kernel *k, float *result, int *rw, int *rh)
{
int x,y,kx,ky,u,v;
int uc = k->w/2;
int vc = k->h/2;
int kw_even = (k->w&1)?0:1;
int kh_even = (k->h&1)?0:1;
int dst_w = w - k->w + 1;
int dst_h = h - k->h + 1;
int img_offset,k_offset;
double sum;
float scale, *dst=result;
if (!dst)
dst = img; /* Convolve in-place */
/* Kernel is applied to all positions where the kernel is fully contained
* in the image */
scale = _calc_scale(k);
for (y=0; y < dst_h; ++y) {
for (x=0; x < dst_w; ++x) {
sum = 0.0;
k_offset = 0;
ky = y+vc;
kx = x+uc;
for (v=-vc; v <= vc-kh_even; ++v) {
img_offset = (ky+v)*w + kx;
for (u=-uc; u <= uc-kw_even; ++u, ++k_offset) {
sum += img[img_offset+u] * k->kernel[k_offset];
}
}
dst[y*dst_w + x] = (float)(sum * scale);
}
}
if (rw) *rw = dst_w;
if (rh) *rh = dst_h;
}
int _iqa_img_filter(float *img, int w, int h, const struct _kernel *k, float *result)
{
int x,y;
int img_offset;
float scale, *dst=result;
if (!k || !k->bnd_opt)
return 1;
if (!dst) {
dst = (float*)malloc(w*h*sizeof(float));
if (!dst)
return 2;
}
scale = _calc_scale(k);
/* Kernel is applied to all positions where top-left corner is in the image */
for (y=0; y < h; ++y) {
for (x=0; x < w; ++x) {
dst[y*w + x] = _iqa_filter_pixel(img, w, h, x, y, k, scale);
}
}
/* If no result buffer given, copy results to image buffer */
if (!result) {
for (y=0; y<h; ++y) {
img_offset = y*w;
for (x=0; x<w; ++x, ++img_offset) {
img[img_offset] = dst[img_offset];
}
}
free(dst);
}
return 0;
}
float _iqa_filter_pixel(const float *img, int w, int h, int x, int y, const struct _kernel *k, const float kscale)
{
int u,v,uc,vc;
int kw_even,kh_even;
int x_edge_left,x_edge_right,y_edge_top,y_edge_bottom;
int edge,img_offset,k_offset;
double sum;
if (!k)
return img[y*w + x];
uc = k->w/2;
vc = k->h/2;
kw_even = (k->w&1)?0:1;
kh_even = (k->h&1)?0:1;
x_edge_left = uc;
x_edge_right = w-uc;
y_edge_top = vc;
y_edge_bottom = h-vc;
edge = 0;
if (x < x_edge_left || y < y_edge_top || x >= x_edge_right || y >= y_edge_bottom)
edge = 1;
sum = 0.0;
k_offset = 0;
for (v=-vc; v <= vc-kh_even; ++v) {
img_offset = (y+v)*w + x;
for (u=-uc; u <= uc-kw_even; ++u, ++k_offset) {
if (!edge)
sum += img[img_offset+u] * k->kernel[k_offset];
else
sum += k->bnd_opt(img, w, h, x+u, y+v, k->bnd_const) * k->kernel[k_offset];
}
}
return (float)(sum * kscale);
}

59
3rdparty/iqa/source/decimate.c vendored Normal file
View File

@@ -0,0 +1,59 @@
/*
* Copyright (c) 2011, Tom Distler (http://tdistler.com)
* All rights reserved.
*
* The BSD License
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* - Neither the name of the tdistler.com nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "decimate.h"
#include <stdlib.h>
int _iqa_decimate(float *img, int w, int h, int factor, const struct _kernel *k, float *result, int *rw, int *rh)
{
int x,y;
int sw = w/factor + (w&1);
int sh = h/factor + (h&1);
int dst_offset;
float *dst=img;
if (result)
dst = result;
/* Downsample */
for (y=0; y<sh; ++y) {
dst_offset = y*sw;
for (x=0; x<sw; ++x,++dst_offset) {
dst[dst_offset] = _iqa_filter_pixel(img, w, h, x*factor, y*factor, k, 1.0f);
}
}
if (rw) *rw = sw;
if (rh) *rh = sh;
return 0;
}

82
3rdparty/iqa/source/math_utils.c vendored Normal file
View File

@@ -0,0 +1,82 @@
/*
* Copyright (c) 2011, Tom Distler (http://tdistler.com)
* All rights reserved.
*
* The BSD License
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* - Neither the name of the tdistler.com nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "math_utils.h"
#include <math.h>
int _round(float a)
{
int sign_a = a > 0.0f ? 1 : -1;
return a-(int)a >= 0.5 ? (int)a + sign_a : (int)a;
}
int _max(int x, int y)
{
return x >= y ? x : y;
}
int _min(int x, int y)
{
return x <= y ? x : y;
}
int _cmp_float(float a, float b, int digits)
{
/* Round */
int sign_a = a > 0.0f ? 1 : -1;
int sign_b = b > 0.0f ? 1 : -1;
double scale = pow(10.0, (double)digits);
double ax = a * scale;
double bx = b * scale;
int ai = ax-(int)ax >= 0.5 ? (int)ax + sign_a : (int)ax;
int bi = bx-(int)bx >= 0.5 ? (int)bx + sign_b : (int)bx;
/* Compare */
return ai == bi ? 0 : 1;
}
int _matrix_cmp(const float *a, const float *b, int w, int h, int digits)
{
int offset;
int result=0;
int len=w*h;
for (offset=0; offset<len; ++offset) {
if (_cmp_float(a[offset], b[offset], digits)) {
result = 1;
break;
}
}
return result;
}

277
3rdparty/iqa/source/ms_ssim.c vendored Normal file
View File

@@ -0,0 +1,277 @@
/*
* Copyright (c) 2011, Tom Distler (http://tdistler.com)
* All rights reserved.
*
* The BSD License
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* - Neither the name of the tdistler.com nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "iqa.h"
#include "ssim.h"
#include "decimate.h"
#include <math.h>
#include <stdlib.h>
#include <string.h>
/* Default number of scales */
#define SCALES 5
/* Low-pass filter for down-sampling (9/7 biorthogonal wavelet filter) */
#define LPF_LEN 9
static const float g_lpf[LPF_LEN][LPF_LEN] = {
{ 0.000714f,-0.000450f,-0.002090f, 0.007132f, 0.016114f, 0.007132f,-0.002090f,-0.000450f, 0.000714f},
{-0.000450f, 0.000283f, 0.001316f,-0.004490f,-0.010146f,-0.004490f, 0.001316f, 0.000283f,-0.000450f},
{-0.002090f, 0.001316f, 0.006115f,-0.020867f,-0.047149f,-0.020867f, 0.006115f, 0.001316f,-0.002090f},
{ 0.007132f,-0.004490f,-0.020867f, 0.071207f, 0.160885f, 0.071207f,-0.020867f,-0.004490f, 0.007132f},
{ 0.016114f,-0.010146f,-0.047149f, 0.160885f, 0.363505f, 0.160885f,-0.047149f,-0.010146f, 0.016114f},
{ 0.007132f,-0.004490f,-0.020867f, 0.071207f, 0.160885f, 0.071207f,-0.020867f,-0.004490f, 0.007132f},
{-0.002090f, 0.001316f, 0.006115f,-0.020867f,-0.047149f,-0.020867f, 0.006115f, 0.001316f,-0.002090f},
{-0.000450f, 0.000283f, 0.001316f,-0.004490f,-0.010146f,-0.004490f, 0.001316f, 0.000283f,-0.000450f},
{ 0.000714f,-0.000450f,-0.002090f, 0.007132f, 0.016114f, 0.007132f,-0.002090f,-0.000450f, 0.000714f},
};
/* Alpha, beta, and gamma values for each scale */
static float g_alphas[] = { 0.0000f, 0.0000f, 0.0000f, 0.0000f, 0.1333f };
static float g_betas[] = { 0.0448f, 0.2856f, 0.3001f, 0.2363f, 0.1333f };
static float g_gammas[] = { 0.0448f, 0.2856f, 0.3001f, 0.2363f, 0.1333f };
struct _context {
double l; /* Luminance */
double c; /* Contrast */
double s; /* Structure */
float alpha;
float beta;
float gamma;
};
/* Called for each pixel */
int _ms_ssim_map(const struct _ssim_int *si, void *ctx)
{
struct _context *ms_ctx = (struct _context*)ctx;
ms_ctx->l += si->l;
ms_ctx->c += si->c;
ms_ctx->s += si->s;
return 0;
}
/* Called to calculate the final result */
float _ms_ssim_reduce(int w, int h, void *ctx)
{
double size = (double)(w*h);
struct _context *ms_ctx = (struct _context*)ctx;
ms_ctx->l = pow(ms_ctx->l / size, (double)ms_ctx->alpha);
ms_ctx->c = pow(ms_ctx->c / size, (double)ms_ctx->beta);
ms_ctx->s = pow(fabs(ms_ctx->s / size), (double)ms_ctx->gamma);
return (float)(ms_ctx->l * ms_ctx->c * ms_ctx->s);
}
/* Releases the scaled buffers */
void _free_buffers(float **buf, int scales)
{
int idx;
for (idx=0; idx<scales; ++idx)
free(buf[idx]);
}
/* Allocates the scaled buffers. If error, all buffers are free'd */
int _alloc_buffers(float **buf, int w, int h, int scales)
{
int idx;
int cur_w = w;
int cur_h = h;
for (idx=0; idx<scales; ++idx) {
buf[idx] = (float*)malloc(cur_w*cur_h*sizeof(float));
if (!buf[idx]) {
_free_buffers(buf, idx);
return 1;
}
cur_w = cur_w/2 + (cur_w&1);
cur_h = cur_h/2 + (cur_h&1);
}
return 0;
}
/*
* MS_SSIM(X,Y) = Lm(x,y)^aM * MULT[j=1->M]( Cj(x,y)^bj * Sj(x,y)^gj )
* where,
* L = mean
* C = variance
* S = cross-correlation
*
* b1=g1=0.0448, b2=g2=0.2856, b3=g3=0.3001, b4=g4=0.2363, a5=b5=g5=0.1333
*/
float iqa_ms_ssim(const unsigned char *ref, const unsigned char *cmp, int w, int h,
int stride, const struct iqa_ms_ssim_args *args)
{
int wang=0;
int scales=SCALES;
int gauss=1;
const float *alphas=g_alphas, *betas=g_betas, *gammas=g_gammas;
int idx,x,y,cur_w,cur_h;
int offset,src_offset;
float **ref_imgs, **cmp_imgs; /* Array of pointers to scaled images */
float msssim;
struct _kernel lpf, window;
struct iqa_ssim_args s_args;
struct _map_reduce mr;
struct _context ms_ctx;
if (args) {
wang = args->wang;
gauss = args->gaussian;
scales = args->scales;
if (args->alphas)
alphas = args->alphas;
if (args->betas)
betas = args->betas;
if (args->gammas)
gammas = args->gammas;
}
/* Make sure we won't scale below 1x1 */
cur_w = w;
cur_h = h;
for (idx=0; idx<scales; ++idx) {
if ( gauss ? cur_w<GAUSSIAN_LEN || cur_h<GAUSSIAN_LEN : cur_w<LPF_LEN || cur_h<LPF_LEN )
return INFINITY;
cur_w /= 2;
cur_h /= 2;
}
window.kernel = (float*)g_square_window;
window.w = window.h = SQUARE_LEN;
window.normalized = 1;
window.bnd_opt = KBND_SYMMETRIC;
if (gauss) {
window.kernel = (float*)g_gaussian_window;
window.w = window.h = GAUSSIAN_LEN;
}
mr.map = _ms_ssim_map;
mr.reduce = _ms_ssim_reduce;
/* Allocate the scaled image buffers */
ref_imgs = (float**)malloc(scales*sizeof(float*));
cmp_imgs = (float**)malloc(scales*sizeof(float*));
if (!ref_imgs || !cmp_imgs) {
if (ref_imgs) free(ref_imgs);
if (cmp_imgs) free(cmp_imgs);
return INFINITY;
}
if (_alloc_buffers(ref_imgs, w, h, scales)) {
free(ref_imgs);
free(cmp_imgs);
return INFINITY;
}
if (_alloc_buffers(cmp_imgs, w, h, scales)) {
_free_buffers(ref_imgs, scales);
free(ref_imgs);
free(cmp_imgs);
return INFINITY;
}
/* Copy original images into first scale buffer, forcing stride = width. */
for (y=0; y<h; ++y) {
src_offset = y*stride;
offset = y*w;
for (x=0; x<w; ++x, ++offset, ++src_offset) {
ref_imgs[0][offset] = (float)ref[src_offset];
cmp_imgs[0][offset] = (float)cmp[src_offset];
}
}
/* Create scaled versions of the images */
cur_w=w;
cur_h=h;
lpf.kernel = (float*)g_lpf;
lpf.w = lpf.h = LPF_LEN;
lpf.normalized = 1;
lpf.bnd_opt = KBND_SYMMETRIC;
for (idx=1; idx<scales; ++idx) {
if (_iqa_decimate(ref_imgs[idx-1], cur_w, cur_h, 2, &lpf, ref_imgs[idx], 0, 0) ||
_iqa_decimate(cmp_imgs[idx-1], cur_w, cur_h, 2, &lpf, cmp_imgs[idx], &cur_w, &cur_h))
{
_free_buffers(ref_imgs, scales);
_free_buffers(cmp_imgs, scales);
free(ref_imgs);
free(cmp_imgs);
return INFINITY;
}
}
cur_w=w;
cur_h=h;
msssim = 1.0;
for (idx=0; idx<scales; ++idx) {
ms_ctx.l = 0;
ms_ctx.c = 0;
ms_ctx.s = 0;
ms_ctx.alpha = alphas[idx];
ms_ctx.beta = betas[idx];
ms_ctx.gamma = gammas[idx];
if (!wang) {
/* MS-SSIM* (Rouse/Hemami) */
s_args.alpha = 1.0f;
s_args.beta = 1.0f;
s_args.gamma = 1.0f;
s_args.K1 = 0.0f; /* Force stabilization constants to 0 */
s_args.K2 = 0.0f;
s_args.L = 255;
s_args.f = 1; /* Don't resize */
mr.context = &ms_ctx;
msssim *= _iqa_ssim(ref_imgs[idx], cmp_imgs[idx], cur_w, cur_h, &window, &mr, &s_args);
}
else {
/* MS-SSIM (Wang) */
s_args.alpha = 1.0f;
s_args.beta = 1.0f;
s_args.gamma = 1.0f;
s_args.K1 = 0.01f;
s_args.K2 = 0.03f;
s_args.L = 255;
s_args.f = 1; /* Don't resize */
mr.context = &ms_ctx;
msssim *= _iqa_ssim(ref_imgs[idx], cmp_imgs[idx], cur_w, cur_h, &window, &mr, &s_args);
}
if (msssim == INFINITY)
break;
cur_w = cur_w/2 + (cur_w&1);
cur_h = cur_h/2 + (cur_h&1);
}
_free_buffers(ref_imgs, scales);
_free_buffers(cmp_imgs, scales);
free(ref_imgs);
free(cmp_imgs);
return msssim;
}

50
3rdparty/iqa/source/mse.c vendored Normal file
View File

@@ -0,0 +1,50 @@
/*
* Copyright (c) 2011, Tom Distler (http://tdistler.com)
* All rights reserved.
*
* The BSD License
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* - Neither the name of the tdistler.com nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "iqa.h"
/* MSE(a,b) = 1/N * SUM((a-b)^2) */
float iqa_mse(const unsigned char *ref, const unsigned char *cmp, int w, int h, int stride)
{
int error, offset;
unsigned long long sum=0;
int ww,hh;
for (hh=0; hh<h; ++hh) {
offset = hh*stride;
for (ww=0; ww<w; ++ww, ++offset) {
error = ref[offset] - cmp[offset];
sum += error * error;
}
}
return (float)( (double)sum / (double)(w*h) );
}

42
3rdparty/iqa/source/psnr.c vendored Normal file
View File

@@ -0,0 +1,42 @@
/*
* Copyright (c) 2011, Tom Distler (http://tdistler.com)
* All rights reserved.
*
* The BSD License
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* - Neither the name of the tdistler.com nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "iqa.h"
#include <math.h>
/* PSNR(a,b) = 10*log10(L^2 / MSE(a,b)), where L=2^b - 1 (8bit = 255) */
float iqa_psnr(const unsigned char *ref, const unsigned char *cmp, int w, int h, int stride)
{
const int L_sqd = 255 * 255;
return (float)( 10.0 * log10( L_sqd / iqa_mse(ref,cmp,w,h,stride) ) );
}

322
3rdparty/iqa/source/ssim.c vendored Normal file
View File

@@ -0,0 +1,322 @@
/*
* Copyright (c) 2011, Tom Distler (http://tdistler.com)
* All rights reserved.
*
* The BSD License
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* - Neither the name of the tdistler.com nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "iqa.h"
#include "convolve.h"
#include "decimate.h"
#include "math_utils.h"
#include "ssim.h"
#include <stdlib.h>
#include <math.h>
/* Forward declarations. */
IQA_INLINE static double _calc_luminance(float, float, float, float);
IQA_INLINE static double _calc_contrast(double, float, float, float, float);
IQA_INLINE static double _calc_structure(float, double, float, float, float, float);
static int _ssim_map(const struct _ssim_int *, void *);
static float _ssim_reduce(int, int, void *);
/*
* SSIM(x,y)=(2*ux*uy + C1)*(2sxy + C2) / (ux^2 + uy^2 + C1)*(sx^2 + sy^2 + C2)
* where,
* ux = SUM(w*x)
* sx = (SUM(w*(x-ux)^2)^0.5
* sxy = SUM(w*(x-ux)*(y-uy))
*
* Returns mean SSIM. MSSIM(X,Y) = 1/M * SUM(SSIM(x,y))
*/
float iqa_ssim(const unsigned char *ref, const unsigned char *cmp, int w, int h, int stride,
int gaussian, const struct iqa_ssim_args *args)
{
int scale;
int x,y,src_offset,offset;
float *ref_f,*cmp_f;
struct _kernel low_pass;
struct _kernel window;
float result;
double ssim_sum=0.0;
struct _map_reduce mr;
/* Initialize algorithm parameters */
scale = _max( 1, _round( (float)_min(w,h) / 256.0f ) );
if (args) {
if(args->f)
scale = args->f;
mr.map = _ssim_map;
mr.reduce = _ssim_reduce;
mr.context = (void*)&ssim_sum;
}
window.kernel = (float*)g_square_window;
window.w = window.h = SQUARE_LEN;
window.normalized = 1;
window.bnd_opt = KBND_SYMMETRIC;
if (gaussian) {
window.kernel = (float*)g_gaussian_window;
window.w = window.h = GAUSSIAN_LEN;
}
/* Convert image values to floats. Forcing stride = width. */
ref_f = (float*)malloc(w*h*sizeof(float));
cmp_f = (float*)malloc(w*h*sizeof(float));
if (!ref_f || !cmp_f) {
if (ref_f) free(ref_f);
if (cmp_f) free(cmp_f);
return INFINITY;
}
for (y=0; y<h; ++y) {
src_offset = y*stride;
offset = y*w;
for (x=0; x<w; ++x, ++offset, ++src_offset) {
ref_f[offset] = (float)ref[src_offset];
cmp_f[offset] = (float)cmp[src_offset];
}
}
/* Scale the images down if required */
if (scale > 1) {
/* Generate simple low-pass filter */
low_pass.kernel = (float*)malloc(scale*scale*sizeof(float));
if (!low_pass.kernel) {
free(ref_f);
free(cmp_f);
return INFINITY;
}
low_pass.w = low_pass.h = scale;
low_pass.normalized = 0;
low_pass.bnd_opt = KBND_SYMMETRIC;
for (offset=0; offset<scale*scale; ++offset)
low_pass.kernel[offset] = 1.0f/(scale*scale);
/* Resample */
if (_iqa_decimate(ref_f, w, h, scale, &low_pass, 0, 0, 0) ||
_iqa_decimate(cmp_f, w, h, scale, &low_pass, 0, &w, &h)) { /* Update w/h */
free(ref_f);
free(cmp_f);
free(low_pass.kernel);
return INFINITY;
}
free(low_pass.kernel);
}
result = _iqa_ssim(ref_f, cmp_f, w, h, &window, &mr, args);
free(ref_f);
free(cmp_f);
return result;
}
/* _iqa_ssim */
float _iqa_ssim(float *ref, float *cmp, int w, int h, const struct _kernel *k, const struct _map_reduce *mr, const struct iqa_ssim_args *args)
{
float alpha=1.0f, beta=1.0f, gamma=1.0f;
int L=255;
float K1=0.01f, K2=0.03f;
float C1,C2,C3;
int x,y,offset;
float *ref_mu,*cmp_mu,*ref_sigma_sqd,*cmp_sigma_sqd,*sigma_both;
double ssim_sum, numerator, denominator;
double luminance_comp, contrast_comp, structure_comp, sigma_root;
struct _ssim_int sint;
/* Initialize algorithm parameters */
if (args) {
if (!mr)
return INFINITY;
alpha = args->alpha;
beta = args->beta;
gamma = args->gamma;
L = args->L;
K1 = args->K1;
K2 = args->K2;
}
C1 = (K1*L)*(K1*L);
C2 = (K2*L)*(K2*L);
C3 = C2 / 2.0f;
ref_mu = (float*)malloc(w*h*sizeof(float));
cmp_mu = (float*)malloc(w*h*sizeof(float));
ref_sigma_sqd = (float*)malloc(w*h*sizeof(float));
cmp_sigma_sqd = (float*)malloc(w*h*sizeof(float));
sigma_both = (float*)malloc(w*h*sizeof(float));
if (!ref_mu || !cmp_mu || !ref_sigma_sqd || !cmp_sigma_sqd || !sigma_both) {
if (ref_mu) free(ref_mu);
if (cmp_mu) free(cmp_mu);
if (ref_sigma_sqd) free(ref_sigma_sqd);
if (cmp_sigma_sqd) free(cmp_sigma_sqd);
if (sigma_both) free(sigma_both);
return INFINITY;
}
/* Calculate mean */
_iqa_convolve(ref, w, h, k, ref_mu, 0, 0);
_iqa_convolve(cmp, w, h, k, cmp_mu, 0, 0);
for (y=0; y<h; ++y) {
offset = y*w;
for (x=0; x<w; ++x, ++offset) {
ref_sigma_sqd[offset] = ref[offset] * ref[offset];
cmp_sigma_sqd[offset] = cmp[offset] * cmp[offset];
sigma_both[offset] = ref[offset] * cmp[offset];
}
}
/* Calculate sigma */
_iqa_convolve(ref_sigma_sqd, w, h, k, 0, 0, 0);
_iqa_convolve(cmp_sigma_sqd, w, h, k, 0, 0, 0);
_iqa_convolve(sigma_both, w, h, k, 0, &w, &h); /* Update the width and height */
/* The convolution results are smaller by the kernel width and height */
for (y=0; y<h; ++y) {
offset = y*w;
for (x=0; x<w; ++x, ++offset) {
ref_sigma_sqd[offset] -= ref_mu[offset] * ref_mu[offset];
cmp_sigma_sqd[offset] -= cmp_mu[offset] * cmp_mu[offset];
sigma_both[offset] -= ref_mu[offset] * cmp_mu[offset];
}
}
ssim_sum = 0.0;
for (y=0; y<h; ++y) {
offset = y*w;
for (x=0; x<w; ++x, ++offset) {
if (!args) {
/* The default case */
numerator = (2.0 * ref_mu[offset] * cmp_mu[offset] + C1) * (2.0 * sigma_both[offset] + C2);
denominator = (ref_mu[offset]*ref_mu[offset] + cmp_mu[offset]*cmp_mu[offset] + C1) *
(ref_sigma_sqd[offset] + cmp_sigma_sqd[offset] + C2);
ssim_sum += numerator / denominator;
}
else {
/* User tweaked alpha, beta, or gamma */
/* passing a negative number to sqrt() cause a domain error */
if (ref_sigma_sqd[offset] < 0.0f)
ref_sigma_sqd[offset] = 0.0f;
if (cmp_sigma_sqd[offset] < 0.0f)
cmp_sigma_sqd[offset] = 0.0f;
sigma_root = sqrt(ref_sigma_sqd[offset] * cmp_sigma_sqd[offset]);
luminance_comp = _calc_luminance(ref_mu[offset], cmp_mu[offset], C1, alpha);
contrast_comp = _calc_contrast(sigma_root, ref_sigma_sqd[offset], cmp_sigma_sqd[offset], C2, beta);
structure_comp = _calc_structure(sigma_both[offset], sigma_root, ref_sigma_sqd[offset], cmp_sigma_sqd[offset], C3, gamma);
sint.l = luminance_comp;
sint.c = contrast_comp;
sint.s = structure_comp;
if (mr->map(&sint, mr->context))
return INFINITY;
}
}
}
free(ref_mu);
free(cmp_mu);
free(ref_sigma_sqd);
free(cmp_sigma_sqd);
free(sigma_both);
if (!args)
return (float)(ssim_sum / (double)(w*h));
return mr->reduce(w, h, mr->context);
}
/* _ssim_map */
int _ssim_map(const struct _ssim_int *si, void *ctx)
{
double *ssim_sum = (double*)ctx;
*ssim_sum += si->l * si->c * si->s;
return 0;
}
/* _ssim_reduce */
float _ssim_reduce(int w, int h, void *ctx)
{
double *ssim_sum = (double*)ctx;
return (float)(*ssim_sum / (double)(w*h));
}
/* _calc_luminance */
IQA_INLINE static double _calc_luminance(float mu1, float mu2, float C1, float alpha)
{
double result;
float sign;
/* For MS-SSIM* */
if (C1 == 0 && mu1*mu1 == 0 && mu2*mu2 == 0)
return 1.0;
result = (2.0 * mu1 * mu2 + C1) / (mu1*mu1 + mu2*mu2 + C1);
if (alpha == 1.0f)
return result;
sign = result < 0.0 ? -1.0f : 1.0f;
return sign * pow(fabs(result),(double)alpha);
}
/* _calc_contrast */
IQA_INLINE static double _calc_contrast(double sigma_comb_12, float sigma1_sqd, float sigma2_sqd, float C2, float beta)
{
double result;
float sign;
/* For MS-SSIM* */
if (C2 == 0 && sigma1_sqd + sigma2_sqd == 0)
return 1.0;
result = (2.0 * sigma_comb_12 + C2) / (sigma1_sqd + sigma2_sqd + C2);
if (beta == 1.0f)
return result;
sign = result < 0.0 ? -1.0f : 1.0f;
return sign * pow(fabs(result),(double)beta);
}
/* _calc_structure */
IQA_INLINE static double _calc_structure(float sigma_12, double sigma_comb_12, float sigma1, float sigma2, float C3, float gamma)
{
double result;
float sign;
/* For MS-SSIM* */
if (C3 == 0 && sigma_comb_12 == 0) {
if (sigma1 == 0 && sigma2 == 0)
return 1.0;
else if (sigma1 == 0 || sigma2 == 0)
return 0.0;
}
result = (sigma_12 + C3) / (sigma_comb_12 + C3);
if (gamma == 1.0f)
return result;
sign = result < 0.0 ? -1.0f : 1.0f;
return sign * pow(fabs(result),(double)gamma);
}

20
3rdparty/libsquish/LICENSE vendored Normal file
View File

@@ -0,0 +1,20 @@
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

35
3rdparty/libsquish/README vendored Normal file
View File

@@ -0,0 +1,35 @@
LICENSE
-------
The squish library is distributed under the terms and conditions of the MIT
license. This license is specified at the top of each source file and must be
preserved in its entirety.
BUILDING AND INSTALLING THE LIBRARY
-----------------------------------
If you are using Visual Studio 2003 or above under Windows then load the Visual
Studio 2003 project in the vs7 folder. By default, the library is built using
SSE2 optimisations. To change this either change or remove the SQUISH_USE_SSE=2
from the preprocessor symbols.
If you are using a Mac then load the Xcode 2.2 project in the distribution. By
default, the library is built using Altivec optimisations. To change this
either change or remove SQUISH_USE_ALTIVEC=1 from the preprocessor symbols. I
guess I'll have to think about changing this for the new Intel Macs that are
rolling out...
If you are using unix then first edit the config file in the base directory of
the distribution, enabling Altivec or SSE with the USE_ALTIVEC or USE_SSE
variables, and editing the optimisation flags passed to the C++ compiler if
necessary. Then make can be used to build the library, and make install (from
the superuser account) can be used to install (into /usr/local by default).
REPORTING BUGS OR FEATURE REQUESTS
----------------------------------
Feedback can be sent to Simon Brown (the developer) at si@sjbrown.co.uk
New releases are announced on the squish library homepage at
http://sjbrown.co.uk/?code=squish

350
3rdparty/libsquish/alpha.cpp vendored Normal file
View File

@@ -0,0 +1,350 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include "alpha.h"
#include <climits>
#include <algorithm>
namespace squish {
static int FloatToInt( float a, int limit )
{
// use ANSI round-to-zero behaviour to get round-to-nearest
int i = ( int )( a + 0.5f );
// clamp to the limit
if( i < 0 )
i = 0;
else if( i > limit )
i = limit;
// done
return i;
}
void CompressAlphaDxt3( u8 const* rgba, int mask, void* block )
{
u8* bytes = reinterpret_cast< u8* >( block );
// quantise and pack the alpha values pairwise
for( int i = 0; i < 8; ++i )
{
// quantise down to 4 bits
float alpha1 = ( float )rgba[8*i + 3] * ( 15.0f/255.0f );
float alpha2 = ( float )rgba[8*i + 7] * ( 15.0f/255.0f );
int quant1 = FloatToInt( alpha1, 15 );
int quant2 = FloatToInt( alpha2, 15 );
// set alpha to zero where masked
int bit1 = 1 << ( 2*i );
int bit2 = 1 << ( 2*i + 1 );
if( ( mask & bit1 ) == 0 )
quant1 = 0;
if( ( mask & bit2 ) == 0 )
quant2 = 0;
// pack into the byte
bytes[i] = ( u8 )( quant1 | ( quant2 << 4 ) );
}
}
void DecompressAlphaDxt3( u8* rgba, void const* block )
{
u8 const* bytes = reinterpret_cast< u8 const* >( block );
// unpack the alpha values pairwise
for( int i = 0; i < 8; ++i )
{
// quantise down to 4 bits
u8 quant = bytes[i];
// unpack the values
u8 lo = quant & 0x0f;
u8 hi = quant & 0xf0;
// convert back up to bytes
rgba[8*i + 3] = lo | ( lo << 4 );
rgba[8*i + 7] = hi | ( hi >> 4 );
}
}
static void FixRange( int& min, int& max, int steps )
{
if( max - min < steps )
max = std::min( min + steps, 255 );
if( max - min < steps )
min = std::max( 0, max - steps );
}
static int FitCodes( u8 const* rgba, int mask, u8 const* codes, u8* indices )
{
// fit each alpha value to the codebook
int err = 0;
for( int i = 0; i < 16; ++i )
{
// check this pixel is valid
int bit = 1 << i;
if( ( mask & bit ) == 0 )
{
// use the first code
indices[i] = 0;
continue;
}
// find the least error and corresponding index
int value = rgba[4*i + 3];
int least = INT_MAX;
int index = 0;
for( int j = 0; j < 8; ++j )
{
// get the squared error from this code
int dist = ( int )value - ( int )codes[j];
dist *= dist;
// compare with the best so far
if( dist < least )
{
least = dist;
index = j;
}
}
// save this index and accumulate the error
indices[i] = ( u8 )index;
err += least;
}
// return the total error
return err;
}
static void WriteAlphaBlock( int alpha0, int alpha1, u8 const* indices, void* block )
{
u8* bytes = reinterpret_cast< u8* >( block );
// write the first two bytes
bytes[0] = ( u8 )alpha0;
bytes[1] = ( u8 )alpha1;
// pack the indices with 3 bits each
u8* dest = bytes + 2;
u8 const* src = indices;
for( int i = 0; i < 2; ++i )
{
// pack 8 3-bit values
int value = 0;
for( int j = 0; j < 8; ++j )
{
int index = *src++;
value |= ( index << 3*j );
}
// store in 3 bytes
for( int j = 0; j < 3; ++j )
{
int byte = ( value >> 8*j ) & 0xff;
*dest++ = ( u8 )byte;
}
}
}
static void WriteAlphaBlock5( int alpha0, int alpha1, u8 const* indices, void* block )
{
// check the relative values of the endpoints
if( alpha0 > alpha1 )
{
// swap the indices
u8 swapped[16];
for( int i = 0; i < 16; ++i )
{
u8 index = indices[i];
if( index == 0 )
swapped[i] = 1;
else if( index == 1 )
swapped[i] = 0;
else if( index <= 5 )
swapped[i] = 7 - index;
else
swapped[i] = index;
}
// write the block
WriteAlphaBlock( alpha1, alpha0, swapped, block );
}
else
{
// write the block
WriteAlphaBlock( alpha0, alpha1, indices, block );
}
}
static void WriteAlphaBlock7( int alpha0, int alpha1, u8 const* indices, void* block )
{
// check the relative values of the endpoints
if( alpha0 < alpha1 )
{
// swap the indices
u8 swapped[16];
for( int i = 0; i < 16; ++i )
{
u8 index = indices[i];
if( index == 0 )
swapped[i] = 1;
else if( index == 1 )
swapped[i] = 0;
else
swapped[i] = 9 - index;
}
// write the block
WriteAlphaBlock( alpha1, alpha0, swapped, block );
}
else
{
// write the block
WriteAlphaBlock( alpha0, alpha1, indices, block );
}
}
void CompressAlphaDxt5( u8 const* rgba, int mask, void* block )
{
// get the range for 5-alpha and 7-alpha interpolation
int min5 = 255;
int max5 = 0;
int min7 = 255;
int max7 = 0;
for( int i = 0; i < 16; ++i )
{
// check this pixel is valid
int bit = 1 << i;
if( ( mask & bit ) == 0 )
continue;
// incorporate into the min/max
int value = rgba[4*i + 3];
if( value < min7 )
min7 = value;
if( value > max7 )
max7 = value;
if( value != 0 && value < min5 )
min5 = value;
if( value != 255 && value > max5 )
max5 = value;
}
// handle the case that no valid range was found
if( min5 > max5 )
min5 = max5;
if( min7 > max7 )
min7 = max7;
// fix the range to be the minimum in each case
FixRange( min5, max5, 5 );
FixRange( min7, max7, 7 );
// set up the 5-alpha code book
u8 codes5[8];
codes5[0] = ( u8 )min5;
codes5[1] = ( u8 )max5;
for( int i = 1; i < 5; ++i )
codes5[1 + i] = ( u8 )( ( ( 5 - i )*min5 + i*max5 )/5 );
codes5[6] = 0;
codes5[7] = 255;
// set up the 7-alpha code book
u8 codes7[8];
codes7[0] = ( u8 )min7;
codes7[1] = ( u8 )max7;
for( int i = 1; i < 7; ++i )
codes7[1 + i] = ( u8 )( ( ( 7 - i )*min7 + i*max7 )/7 );
// fit the data to both code books
u8 indices5[16];
u8 indices7[16];
int err5 = FitCodes( rgba, mask, codes5, indices5 );
int err7 = FitCodes( rgba, mask, codes7, indices7 );
// save the block with least error
if( err5 <= err7 )
WriteAlphaBlock5( min5, max5, indices5, block );
else
WriteAlphaBlock7( min7, max7, indices7, block );
}
void DecompressAlphaDxt5( u8* rgba, void const* block )
{
// get the two alpha values
u8 const* bytes = reinterpret_cast< u8 const* >( block );
int alpha0 = bytes[0];
int alpha1 = bytes[1];
// compare the values to build the codebook
u8 codes[8];
codes[0] = ( u8 )alpha0;
codes[1] = ( u8 )alpha1;
if( alpha0 <= alpha1 )
{
// use 5-alpha codebook
for( int i = 1; i < 5; ++i )
codes[1 + i] = ( u8 )( ( ( 5 - i )*alpha0 + i*alpha1 )/5 );
codes[6] = 0;
codes[7] = 255;
}
else
{
// use 7-alpha codebook
for( int i = 1; i < 7; ++i )
codes[1 + i] = ( u8 )( ( ( 7 - i )*alpha0 + i*alpha1 )/7 );
}
// decode the indices
u8 indices[16];
u8 const* src = bytes + 2;
u8* dest = indices;
for( int i = 0; i < 2; ++i )
{
// grab 3 bytes
int value = 0;
for( int j = 0; j < 3; ++j )
{
int byte = *src++;
value |= ( byte << 8*j );
}
// unpack 8 3-bit values from it
for( int j = 0; j < 8; ++j )
{
int index = ( value >> 3*j ) & 0x7;
*dest++ = ( u8 )index;
}
}
// write out the indexed codebook values
for( int i = 0; i < 16; ++i )
rgba[4*i + 3] = codes[indices[i]];
}
} // namespace squish

41
3rdparty/libsquish/alpha.h vendored Normal file
View File

@@ -0,0 +1,41 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_ALPHA_H
#define SQUISH_ALPHA_H
#include "squish.h"
namespace squish {
void CompressAlphaDxt3( u8 const* rgba, int mask, void* block );
void CompressAlphaDxt5( u8 const* rgba, int mask, void* block );
void DecompressAlphaDxt3( u8* rgba, void const* block );
void DecompressAlphaDxt5( u8* rgba, void const* block );
} // namespace squish
#endif // ndef SQUISH_ALPHA_H

392
3rdparty/libsquish/clusterfit.cpp vendored Normal file
View File

@@ -0,0 +1,392 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Copyright (c) 2007 Ignacio Castano icastano@nvidia.com
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include "clusterfit.h"
#include "colourset.h"
#include "colourblock.h"
#include <cfloat>
namespace squish {
ClusterFit::ClusterFit( ColourSet const* colours, int flags, float* metric )
: ColourFit( colours, flags )
{
// set the iteration count
m_iterationCount = ( m_flags & kColourIterativeClusterFit ) ? kMaxIterations : 1;
// initialise the metric (old perceptual = 0.2126f, 0.7152f, 0.0722f)
if( metric )
m_metric = Vec4( metric[0], metric[1], metric[2], 1.0f );
else
m_metric = VEC4_CONST( 1.0f );
// initialise the best error
m_besterror = VEC4_CONST( FLT_MAX );
// cache some values
int const count = m_colours->GetCount();
Vec3 const* values = m_colours->GetPoints();
// get the covariance matrix
Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights() );
// compute the principle component
m_principle = ComputePrincipleComponent( covariance );
}
bool ClusterFit::ConstructOrdering( Vec3 const& axis, int iteration )
{
// cache some values
int const count = m_colours->GetCount();
Vec3 const* values = m_colours->GetPoints();
// build the list of dot products
float dps[16];
u8* order = ( u8* )m_order + 16*iteration;
for( int i = 0; i < count; ++i )
{
dps[i] = Dot( values[i], axis );
order[i] = ( u8 )i;
}
// stable sort using them
for( int i = 0; i < count; ++i )
{
for( int j = i; j > 0 && dps[j] < dps[j - 1]; --j )
{
std::swap( dps[j], dps[j - 1] );
std::swap( order[j], order[j - 1] );
}
}
// check this ordering is unique
for( int it = 0; it < iteration; ++it )
{
u8 const* prev = ( u8* )m_order + 16*it;
bool same = true;
for( int i = 0; i < count; ++i )
{
if( order[i] != prev[i] )
{
same = false;
break;
}
}
if( same )
return false;
}
// copy the ordering and weight all the points
Vec3 const* unweighted = m_colours->GetPoints();
float const* weights = m_colours->GetWeights();
m_xsum_wsum = VEC4_CONST( 0.0f );
for( int i = 0; i < count; ++i )
{
int j = order[i];
Vec4 p( unweighted[j].X(), unweighted[j].Y(), unweighted[j].Z(), 1.0f );
Vec4 w( weights[j] );
Vec4 x = p*w;
m_points_weights[i] = x;
m_xsum_wsum += x;
}
return true;
}
void ClusterFit::Compress3( void* block )
{
// declare variables
int const count = m_colours->GetCount();
Vec4 const two = VEC4_CONST( 2.0 );
Vec4 const one = VEC4_CONST( 1.0f );
Vec4 const half_half2( 0.5f, 0.5f, 0.5f, 0.25f );
Vec4 const zero = VEC4_CONST( 0.0f );
Vec4 const half = VEC4_CONST( 0.5f );
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
// prepare an ordering using the principle axis
ConstructOrdering( m_principle, 0 );
// check all possible clusters and iterate on the total order
Vec4 beststart = VEC4_CONST( 0.0f );
Vec4 bestend = VEC4_CONST( 0.0f );
Vec4 besterror = m_besterror;
u8 bestindices[16];
int bestiteration = 0;
int besti = 0, bestj = 0;
// loop over iterations (we avoid the case that all points in first or last cluster)
for( int iterationIndex = 0;; )
{
// first cluster [0,i) is at the start
Vec4 part0 = VEC4_CONST( 0.0f );
for( int i = 0; i < count; ++i )
{
// second cluster [i,j) is half along
Vec4 part1 = ( i == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
int jmin = ( i == 0 ) ? 1 : i;
for( int j = jmin;; )
{
// last cluster [j,count) is at the end
Vec4 part2 = m_xsum_wsum - part1 - part0;
// compute least squares terms directly
Vec4 alphax_sum = MultiplyAdd( part1, half_half2, part0 );
Vec4 alpha2_sum = alphax_sum.SplatW();
Vec4 betax_sum = MultiplyAdd( part1, half_half2, part2 );
Vec4 beta2_sum = betax_sum.SplatW();
Vec4 alphabeta_sum = ( part1*half_half2 ).SplatW();
// compute the least-squares optimal points
Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
// clamp to the grid
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
// compute the error (we skip the constant xxsum)
Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
Vec4 e4 = MultiplyAdd( two, e3, e1 );
// apply the metric to the error term
Vec4 e5 = e4*m_metric;
Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
// keep the solution if it wins
if( CompareAnyLessThan( error, besterror ) )
{
beststart = a;
bestend = b;
besti = i;
bestj = j;
besterror = error;
bestiteration = iterationIndex;
}
// advance
if( j == count )
break;
part1 += m_points_weights[j];
++j;
}
// advance
part0 += m_points_weights[i];
}
// stop if we didn't improve in this iteration
if( bestiteration != iterationIndex )
break;
// advance if possible
++iterationIndex;
if( iterationIndex == m_iterationCount )
break;
// stop if a new iteration is an ordering that has already been tried
Vec3 axis = ( bestend - beststart ).GetVec3();
if( !ConstructOrdering( axis, iterationIndex ) )
break;
}
// save the block if necessary
if( CompareAnyLessThan( besterror, m_besterror ) )
{
// remap the indices
u8 const* order = ( u8* )m_order + 16*bestiteration;
u8 unordered[16];
for( int m = 0; m < besti; ++m )
unordered[order[m]] = 0;
for( int m = besti; m < bestj; ++m )
unordered[order[m]] = 2;
for( int m = bestj; m < count; ++m )
unordered[order[m]] = 1;
m_colours->RemapIndices( unordered, bestindices );
// save the block
WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
// save the error
m_besterror = besterror;
}
}
void ClusterFit::Compress4( void* block )
{
// declare variables
int const count = m_colours->GetCount();
Vec4 const two = VEC4_CONST( 2.0f );
Vec4 const one = VEC4_CONST( 1.0f );
Vec4 const onethird_onethird2( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
Vec4 const twothirds_twothirds2( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f );
Vec4 const zero = VEC4_CONST( 0.0f );
Vec4 const half = VEC4_CONST( 0.5f );
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
// prepare an ordering using the principle axis
ConstructOrdering( m_principle, 0 );
// check all possible clusters and iterate on the total order
Vec4 beststart = VEC4_CONST( 0.0f );
Vec4 bestend = VEC4_CONST( 0.0f );
Vec4 besterror = m_besterror;
u8 bestindices[16];
int bestiteration = 0;
int besti = 0, bestj = 0, bestk = 0;
// loop over iterations (we avoid the case that all points in first or last cluster)
for( int iterationIndex = 0;; )
{
// first cluster [0,i) is at the start
Vec4 part0 = VEC4_CONST( 0.0f );
for( int i = 0; i < count; ++i )
{
// second cluster [i,j) is one third along
Vec4 part1 = VEC4_CONST( 0.0f );
for( int j = i;; )
{
// third cluster [j,k) is two thirds along
Vec4 part2 = ( j == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
int kmin = ( j == 0 ) ? 1 : j;
for( int k = kmin;; )
{
// last cluster [k,count) is at the end
Vec4 part3 = m_xsum_wsum - part2 - part1 - part0;
// compute least squares terms directly
Vec4 const alphax_sum = MultiplyAdd( part2, onethird_onethird2, MultiplyAdd( part1, twothirds_twothirds2, part0 ) );
Vec4 const alpha2_sum = alphax_sum.SplatW();
Vec4 const betax_sum = MultiplyAdd( part1, onethird_onethird2, MultiplyAdd( part2, twothirds_twothirds2, part3 ) );
Vec4 const beta2_sum = betax_sum.SplatW();
Vec4 const alphabeta_sum = twonineths*( part1 + part2 ).SplatW();
// compute the least-squares optimal points
Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
// clamp to the grid
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
// compute the error (we skip the constant xxsum)
Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
Vec4 e4 = MultiplyAdd( two, e3, e1 );
// apply the metric to the error term
Vec4 e5 = e4*m_metric;
Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
// keep the solution if it wins
if( CompareAnyLessThan( error, besterror ) )
{
beststart = a;
bestend = b;
besterror = error;
besti = i;
bestj = j;
bestk = k;
bestiteration = iterationIndex;
}
// advance
if( k == count )
break;
part2 += m_points_weights[k];
++k;
}
// advance
if( j == count )
break;
part1 += m_points_weights[j];
++j;
}
// advance
part0 += m_points_weights[i];
}
// stop if we didn't improve in this iteration
if( bestiteration != iterationIndex )
break;
// advance if possible
++iterationIndex;
if( iterationIndex == m_iterationCount )
break;
// stop if a new iteration is an ordering that has already been tried
Vec3 axis = ( bestend - beststart ).GetVec3();
if( !ConstructOrdering( axis, iterationIndex ) )
break;
}
// save the block if necessary
if( CompareAnyLessThan( besterror, m_besterror ) )
{
// remap the indices
u8 const* order = ( u8* )m_order + 16*bestiteration;
u8 unordered[16];
for( int m = 0; m < besti; ++m )
unordered[order[m]] = 0;
for( int m = besti; m < bestj; ++m )
unordered[order[m]] = 2;
for( int m = bestj; m < bestk; ++m )
unordered[order[m]] = 3;
for( int m = bestk; m < count; ++m )
unordered[order[m]] = 1;
m_colours->RemapIndices( unordered, bestindices );
// save the block
WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
// save the error
m_besterror = besterror;
}
}
} // namespace squish

61
3rdparty/libsquish/clusterfit.h vendored Normal file
View File

@@ -0,0 +1,61 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Copyright (c) 2007 Ignacio Castano icastano@nvidia.com
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_CLUSTERFIT_H
#define SQUISH_CLUSTERFIT_H
#include "squish.h"
#include "maths.h"
#include "simd.h"
#include "colourfit.h"
namespace squish {
class ClusterFit : public ColourFit
{
public:
ClusterFit( ColourSet const* colours, int flags, float* metric );
private:
bool ConstructOrdering( Vec3 const& axis, int iteration );
virtual void Compress3( void* block );
virtual void Compress4( void* block );
enum { kMaxIterations = 8 };
int m_iterationCount;
Vec3 m_principle;
u8 m_order[16*kMaxIterations];
Vec4 m_points_weights[16];
Vec4 m_xsum_wsum;
Vec4 m_metric;
Vec4 m_besterror;
};
} // namespace squish
#endif // ndef SQUISH_CLUSTERFIT_H

214
3rdparty/libsquish/colourblock.cpp vendored Normal file
View File

@@ -0,0 +1,214 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include "colourblock.h"
namespace squish {
static int FloatToInt( float a, int limit )
{
// use ANSI round-to-zero behaviour to get round-to-nearest
int i = ( int )( a + 0.5f );
// clamp to the limit
if( i < 0 )
i = 0;
else if( i > limit )
i = limit;
// done
return i;
}
static int FloatTo565( Vec3::Arg colour )
{
// get the components in the correct range
int r = FloatToInt( 31.0f*colour.X(), 31 );
int g = FloatToInt( 63.0f*colour.Y(), 63 );
int b = FloatToInt( 31.0f*colour.Z(), 31 );
// pack into a single value
return ( r << 11 ) | ( g << 5 ) | b;
}
static void WriteColourBlock( int a, int b, u8* indices, void* block )
{
// get the block as bytes
u8* bytes = ( u8* )block;
// write the endpoints
bytes[0] = ( u8 )( a & 0xff );
bytes[1] = ( u8 )( a >> 8 );
bytes[2] = ( u8 )( b & 0xff );
bytes[3] = ( u8 )( b >> 8 );
// write the indices
for( int i = 0; i < 4; ++i )
{
u8 const* ind = indices + 4*i;
bytes[4 + i] = ind[0] | ( ind[1] << 2 ) | ( ind[2] << 4 ) | ( ind[3] << 6 );
}
}
void WriteColourBlock3( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block )
{
// get the packed values
int a = FloatTo565( start );
int b = FloatTo565( end );
// remap the indices
u8 remapped[16];
if( a <= b )
{
// use the indices directly
for( int i = 0; i < 16; ++i )
remapped[i] = indices[i];
}
else
{
// swap a and b
std::swap( a, b );
for( int i = 0; i < 16; ++i )
{
if( indices[i] == 0 )
remapped[i] = 1;
else if( indices[i] == 1 )
remapped[i] = 0;
else
remapped[i] = indices[i];
}
}
// write the block
WriteColourBlock( a, b, remapped, block );
}
void WriteColourBlock4( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block )
{
// get the packed values
int a = FloatTo565( start );
int b = FloatTo565( end );
// remap the indices
u8 remapped[16];
if( a < b )
{
// swap a and b
std::swap( a, b );
for( int i = 0; i < 16; ++i )
remapped[i] = ( indices[i] ^ 0x1 ) & 0x3;
}
else if( a == b )
{
// use index 0
for( int i = 0; i < 16; ++i )
remapped[i] = 0;
}
else
{
// use the indices directly
for( int i = 0; i < 16; ++i )
remapped[i] = indices[i];
}
// write the block
WriteColourBlock( a, b, remapped, block );
}
static int Unpack565( u8 const* packed, u8* colour )
{
// build the packed value
int value = ( int )packed[0] | ( ( int )packed[1] << 8 );
// get the components in the stored range
u8 red = ( u8 )( ( value >> 11 ) & 0x1f );
u8 green = ( u8 )( ( value >> 5 ) & 0x3f );
u8 blue = ( u8 )( value & 0x1f );
// scale up to 8 bits
colour[0] = ( red << 3 ) | ( red >> 2 );
colour[1] = ( green << 2 ) | ( green >> 4 );
colour[2] = ( blue << 3 ) | ( blue >> 2 );
colour[3] = 255;
// return the value
return value;
}
void DecompressColour( u8* rgba, void const* block, bool isDxt1 )
{
// get the block bytes
u8 const* bytes = reinterpret_cast< u8 const* >( block );
// unpack the endpoints
u8 codes[16];
int a = Unpack565( bytes, codes );
int b = Unpack565( bytes + 2, codes + 4 );
// generate the midpoints
for( int i = 0; i < 3; ++i )
{
int c = codes[i];
int d = codes[4 + i];
if( isDxt1 && a <= b )
{
codes[8 + i] = ( u8 )( ( c + d )/2 );
codes[12 + i] = 0;
}
else
{
codes[8 + i] = ( u8 )( ( 2*c + d )/3 );
codes[12 + i] = ( u8 )( ( c + 2*d )/3 );
}
}
// fill in alpha for the intermediate values
codes[8 + 3] = 255;
codes[12 + 3] = ( isDxt1 && a <= b ) ? 0 : 255;
// unpack the indices
u8 indices[16];
for( int i = 0; i < 4; ++i )
{
u8* ind = indices + 4*i;
u8 packed = bytes[4 + i];
ind[0] = packed & 0x3;
ind[1] = ( packed >> 2 ) & 0x3;
ind[2] = ( packed >> 4 ) & 0x3;
ind[3] = ( packed >> 6 ) & 0x3;
}
// store out the colours
for( int i = 0; i < 16; ++i )
{
u8 offset = 4*indices[i];
for( int j = 0; j < 4; ++j )
rgba[4*i + j] = codes[offset + j];
}
}
} // namespace squish

41
3rdparty/libsquish/colourblock.h vendored Normal file
View File

@@ -0,0 +1,41 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_COLOURBLOCK_H
#define SQUISH_COLOURBLOCK_H
#include "squish.h"
#include "maths.h"
namespace squish {
void WriteColourBlock3( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block );
void WriteColourBlock4( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block );
void DecompressColour( u8* rgba, void const* block, bool isDxt1 );
} // namespace squish
#endif // ndef SQUISH_COLOURBLOCK_H

54
3rdparty/libsquish/colourfit.cpp vendored Normal file
View File

@@ -0,0 +1,54 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include "colourfit.h"
#include "colourset.h"
namespace squish {
ColourFit::ColourFit( ColourSet const* colours, int flags )
: m_colours( colours ),
m_flags( flags )
{
}
ColourFit::~ColourFit()
{
}
void ColourFit::Compress( void* block )
{
bool isDxt1 = ( ( m_flags & kDxt1 ) != 0 );
if( isDxt1 )
{
Compress3( block );
if( !m_colours->IsTransparent() )
Compress4( block );
}
else
Compress4( block );
}
} // namespace squish

56
3rdparty/libsquish/colourfit.h vendored Normal file
View File

@@ -0,0 +1,56 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_COLOURFIT_H
#define SQUISH_COLOURFIT_H
#include "squish.h"
#include "maths.h"
#include <climits>
namespace squish {
class ColourSet;
class ColourFit
{
public:
ColourFit( ColourSet const* colours, int flags );
virtual ~ColourFit();
void Compress( void* block );
protected:
virtual void Compress3( void* block ) = 0;
virtual void Compress4( void* block ) = 0;
ColourSet const* m_colours;
int m_flags;
};
} // namespace squish
#endif // ndef SQUISH_COLOURFIT_H

121
3rdparty/libsquish/colourset.cpp vendored Normal file
View File

@@ -0,0 +1,121 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include "colourset.h"
namespace squish {
ColourSet::ColourSet( u8 const* rgba, int mask, int flags )
: m_count( 0 ),
m_transparent( false )
{
// check the compression mode for dxt1
bool isDxt1 = ( ( flags & kDxt1 ) != 0 );
bool weightByAlpha = ( ( flags & kWeightColourByAlpha ) != 0 );
// create the minimal set
for( int i = 0; i < 16; ++i )
{
// check this pixel is enabled
int bit = 1 << i;
if( ( mask & bit ) == 0 )
{
m_remap[i] = -1;
continue;
}
// check for transparent pixels when using dxt1
if( isDxt1 && rgba[4*i + 3] < 128 )
{
m_remap[i] = -1;
m_transparent = true;
continue;
}
// loop over previous points for a match
for( int j = 0;; ++j )
{
// allocate a new point
if( j == i )
{
// normalise coordinates to [0,1]
float x = ( float )rgba[4*i] / 255.0f;
float y = ( float )rgba[4*i + 1] / 255.0f;
float z = ( float )rgba[4*i + 2] / 255.0f;
// ensure there is always non-zero weight even for zero alpha
float w = ( float )( rgba[4*i + 3] + 1 ) / 256.0f;
// add the point
m_points[m_count] = Vec3( x, y, z );
m_weights[m_count] = ( weightByAlpha ? w : 1.0f );
m_remap[i] = m_count;
// advance
++m_count;
break;
}
// check for a match
int oldbit = 1 << j;
bool match = ( ( mask & oldbit ) != 0 )
&& ( rgba[4*i] == rgba[4*j] )
&& ( rgba[4*i + 1] == rgba[4*j + 1] )
&& ( rgba[4*i + 2] == rgba[4*j + 2] )
&& ( rgba[4*j + 3] >= 128 || !isDxt1 );
if( match )
{
// get the index of the match
int index = m_remap[j];
// ensure there is always non-zero weight even for zero alpha
float w = ( float )( rgba[4*i + 3] + 1 ) / 256.0f;
// map to this point and increase the weight
m_weights[index] += ( weightByAlpha ? w : 1.0f );
m_remap[i] = index;
break;
}
}
}
// square root the weights
for( int i = 0; i < m_count; ++i )
m_weights[i] = std::sqrt( m_weights[i] );
}
void ColourSet::RemapIndices( u8 const* source, u8* target ) const
{
for( int i = 0; i < 16; ++i )
{
int j = m_remap[i];
if( j == -1 )
target[i] = 3;
else
target[i] = source[j];
}
}
} // namespace squish

58
3rdparty/libsquish/colourset.h vendored Normal file
View File

@@ -0,0 +1,58 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_COLOURSET_H
#define SQUISH_COLOURSET_H
#include "squish.h"
#include "maths.h"
namespace squish {
/*! @brief Represents a set of block colours
*/
class ColourSet
{
public:
ColourSet( u8 const* rgba, int mask, int flags );
int GetCount() const { return m_count; }
Vec3 const* GetPoints() const { return m_points; }
float const* GetWeights() const { return m_weights; }
bool IsTransparent() const { return m_transparent; }
void RemapIndices( u8 const* source, u8* target ) const;
private:
int m_count;
Vec3 m_points[16];
float m_weights[16];
int m_remap[16];
bool m_transparent;
};
} // namespace sqish
#endif // ndef SQUISH_COLOURSET_H

49
3rdparty/libsquish/config.h vendored Normal file
View File

@@ -0,0 +1,49 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_CONFIG_H
#define SQUISH_CONFIG_H
// Set to 1 when building squish to use Altivec instructions.
#ifndef SQUISH_USE_ALTIVEC
#define SQUISH_USE_ALTIVEC 0
#endif
// Set to 1 or 2 when building squish to use SSE or SSE2 instructions.
#ifndef SQUISH_USE_SSE
#define SQUISH_USE_SSE 0
#endif
// Internally set SQUISH_USE_SIMD when either Altivec or SSE is available.
#if SQUISH_USE_ALTIVEC && SQUISH_USE_SSE
#error "Cannot enable both Altivec and SSE!"
#endif
#if SQUISH_USE_ALTIVEC || SQUISH_USE_SSE
#define SQUISH_USE_SIMD 1
#else
#define SQUISH_USE_SIMD 0
#endif
#endif // ndef SQUISH_CONFIG_H

259
3rdparty/libsquish/maths.cpp vendored Normal file
View File

@@ -0,0 +1,259 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
/*! @file
The symmetric eigensystem solver algorithm is from
http://www.geometrictools.com/Documentation/EigenSymmetric3x3.pdf
*/
#include "maths.h"
#include "simd.h"
#include <cfloat>
namespace squish {
Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weights )
{
// compute the centroid
float total = 0.0f;
Vec3 centroid( 0.0f );
for( int i = 0; i < n; ++i )
{
total += weights[i];
centroid += weights[i]*points[i];
}
if( total > FLT_EPSILON )
centroid /= total;
// accumulate the covariance matrix
Sym3x3 covariance( 0.0f );
for( int i = 0; i < n; ++i )
{
Vec3 a = points[i] - centroid;
Vec3 b = weights[i]*a;
covariance[0] += a.X()*b.X();
covariance[1] += a.X()*b.Y();
covariance[2] += a.X()*b.Z();
covariance[3] += a.Y()*b.Y();
covariance[4] += a.Y()*b.Z();
covariance[5] += a.Z()*b.Z();
}
// return it
return covariance;
}
#if 0
static Vec3 GetMultiplicity1Evector( Sym3x3 const& matrix, float evalue )
{
// compute M
Sym3x3 m;
m[0] = matrix[0] - evalue;
m[1] = matrix[1];
m[2] = matrix[2];
m[3] = matrix[3] - evalue;
m[4] = matrix[4];
m[5] = matrix[5] - evalue;
// compute U
Sym3x3 u;
u[0] = m[3]*m[5] - m[4]*m[4];
u[1] = m[2]*m[4] - m[1]*m[5];
u[2] = m[1]*m[4] - m[2]*m[3];
u[3] = m[0]*m[5] - m[2]*m[2];
u[4] = m[1]*m[2] - m[4]*m[0];
u[5] = m[0]*m[3] - m[1]*m[1];
// find the largest component
float mc = std::fabs( u[0] );
int mi = 0;
for( int i = 1; i < 6; ++i )
{
float c = std::fabs( u[i] );
if( c > mc )
{
mc = c;
mi = i;
}
}
// pick the column with this component
switch( mi )
{
case 0:
return Vec3( u[0], u[1], u[2] );
case 1:
case 3:
return Vec3( u[1], u[3], u[4] );
default:
return Vec3( u[2], u[4], u[5] );
}
}
static Vec3 GetMultiplicity2Evector( Sym3x3 const& matrix, float evalue )
{
// compute M
Sym3x3 m;
m[0] = matrix[0] - evalue;
m[1] = matrix[1];
m[2] = matrix[2];
m[3] = matrix[3] - evalue;
m[4] = matrix[4];
m[5] = matrix[5] - evalue;
// find the largest component
float mc = std::fabs( m[0] );
int mi = 0;
for( int i = 1; i < 6; ++i )
{
float c = std::fabs( m[i] );
if( c > mc )
{
mc = c;
mi = i;
}
}
// pick the first eigenvector based on this index
switch( mi )
{
case 0:
case 1:
return Vec3( -m[1], m[0], 0.0f );
case 2:
return Vec3( m[2], 0.0f, -m[0] );
case 3:
case 4:
return Vec3( 0.0f, -m[4], m[3] );
default:
return Vec3( 0.0f, -m[5], m[4] );
}
}
Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
{
// compute the cubic coefficients
float c0 = matrix[0]*matrix[3]*matrix[5]
+ 2.0f*matrix[1]*matrix[2]*matrix[4]
- matrix[0]*matrix[4]*matrix[4]
- matrix[3]*matrix[2]*matrix[2]
- matrix[5]*matrix[1]*matrix[1];
float c1 = matrix[0]*matrix[3] + matrix[0]*matrix[5] + matrix[3]*matrix[5]
- matrix[1]*matrix[1] - matrix[2]*matrix[2] - matrix[4]*matrix[4];
float c2 = matrix[0] + matrix[3] + matrix[5];
// compute the quadratic coefficients
float a = c1 - ( 1.0f/3.0f )*c2*c2;
float b = ( -2.0f/27.0f )*c2*c2*c2 + ( 1.0f/3.0f )*c1*c2 - c0;
// compute the root count check
float Q = 0.25f*b*b + ( 1.0f/27.0f )*a*a*a;
// test the multiplicity
if( FLT_EPSILON < Q )
{
// only one root, which implies we have a multiple of the identity
return Vec3( 1.0f );
}
else if( Q < -FLT_EPSILON )
{
// three distinct roots
float theta = std::atan2( std::sqrt( -Q ), -0.5f*b );
float rho = std::sqrt( 0.25f*b*b - Q );
float rt = std::pow( rho, 1.0f/3.0f );
float ct = std::cos( theta/3.0f );
float st = std::sin( theta/3.0f );
float l1 = ( 1.0f/3.0f )*c2 + 2.0f*rt*ct;
float l2 = ( 1.0f/3.0f )*c2 - rt*( ct + ( float )sqrt( 3.0f )*st );
float l3 = ( 1.0f/3.0f )*c2 - rt*( ct - ( float )sqrt( 3.0f )*st );
// pick the larger
if( std::fabs( l2 ) > std::fabs( l1 ) )
l1 = l2;
if( std::fabs( l3 ) > std::fabs( l1 ) )
l1 = l3;
// get the eigenvector
return GetMultiplicity1Evector( matrix, l1 );
}
else // if( -FLT_EPSILON <= Q && Q <= FLT_EPSILON )
{
// two roots
float rt;
if( b < 0.0f )
rt = -std::pow( -0.5f*b, 1.0f/3.0f );
else
rt = std::pow( 0.5f*b, 1.0f/3.0f );
float l1 = ( 1.0f/3.0f )*c2 + rt; // repeated
float l2 = ( 1.0f/3.0f )*c2 - 2.0f*rt;
// get the eigenvector
if( std::fabs( l1 ) > std::fabs( l2 ) )
return GetMultiplicity2Evector( matrix, l1 );
else
return GetMultiplicity1Evector( matrix, l2 );
}
}
#else
#define POWER_ITERATION_COUNT 8
Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
{
Vec4 const row0( matrix[0], matrix[1], matrix[2], 0.0f );
Vec4 const row1( matrix[1], matrix[3], matrix[4], 0.0f );
Vec4 const row2( matrix[2], matrix[4], matrix[5], 0.0f );
Vec4 v = VEC4_CONST( 1.0f );
for( int i = 0; i < POWER_ITERATION_COUNT; ++i )
{
// matrix multiply
Vec4 w = row0*v.SplatX();
w = MultiplyAdd(row1, v.SplatY(), w);
w = MultiplyAdd(row2, v.SplatZ(), w);
// get max component from xyz in all channels
Vec4 a = Max(w.SplatX(), Max(w.SplatY(), w.SplatZ()));
// divide through and advance
v = w*Reciprocal(a);
}
return v.GetVec3();
}
#endif
} // namespace squish

233
3rdparty/libsquish/maths.h vendored Normal file
View File

@@ -0,0 +1,233 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_MATHS_H
#define SQUISH_MATHS_H
#include <cmath>
#include <algorithm>
#include "config.h"
namespace squish {
class Vec3
{
public:
typedef Vec3 const& Arg;
Vec3()
{
}
explicit Vec3( float s )
{
m_x = s;
m_y = s;
m_z = s;
}
Vec3( float x, float y, float z )
{
m_x = x;
m_y = y;
m_z = z;
}
float X() const { return m_x; }
float Y() const { return m_y; }
float Z() const { return m_z; }
Vec3 operator-() const
{
return Vec3( -m_x, -m_y, -m_z );
}
Vec3& operator+=( Arg v )
{
m_x += v.m_x;
m_y += v.m_y;
m_z += v.m_z;
return *this;
}
Vec3& operator-=( Arg v )
{
m_x -= v.m_x;
m_y -= v.m_y;
m_z -= v.m_z;
return *this;
}
Vec3& operator*=( Arg v )
{
m_x *= v.m_x;
m_y *= v.m_y;
m_z *= v.m_z;
return *this;
}
Vec3& operator*=( float s )
{
m_x *= s;
m_y *= s;
m_z *= s;
return *this;
}
Vec3& operator/=( Arg v )
{
m_x /= v.m_x;
m_y /= v.m_y;
m_z /= v.m_z;
return *this;
}
Vec3& operator/=( float s )
{
float t = 1.0f/s;
m_x *= t;
m_y *= t;
m_z *= t;
return *this;
}
friend Vec3 operator+( Arg left, Arg right )
{
Vec3 copy( left );
return copy += right;
}
friend Vec3 operator-( Arg left, Arg right )
{
Vec3 copy( left );
return copy -= right;
}
friend Vec3 operator*( Arg left, Arg right )
{
Vec3 copy( left );
return copy *= right;
}
friend Vec3 operator*( Arg left, float right )
{
Vec3 copy( left );
return copy *= right;
}
friend Vec3 operator*( float left, Arg right )
{
Vec3 copy( right );
return copy *= left;
}
friend Vec3 operator/( Arg left, Arg right )
{
Vec3 copy( left );
return copy /= right;
}
friend Vec3 operator/( Arg left, float right )
{
Vec3 copy( left );
return copy /= right;
}
friend float Dot( Arg left, Arg right )
{
return left.m_x*right.m_x + left.m_y*right.m_y + left.m_z*right.m_z;
}
friend Vec3 Min( Arg left, Arg right )
{
return Vec3(
std::min( left.m_x, right.m_x ),
std::min( left.m_y, right.m_y ),
std::min( left.m_z, right.m_z )
);
}
friend Vec3 Max( Arg left, Arg right )
{
return Vec3(
std::max( left.m_x, right.m_x ),
std::max( left.m_y, right.m_y ),
std::max( left.m_z, right.m_z )
);
}
friend Vec3 Truncate( Arg v )
{
return Vec3(
v.m_x > 0.0f ? std::floor( v.m_x ) : std::ceil( v.m_x ),
v.m_y > 0.0f ? std::floor( v.m_y ) : std::ceil( v.m_y ),
v.m_z > 0.0f ? std::floor( v.m_z ) : std::ceil( v.m_z )
);
}
private:
float m_x;
float m_y;
float m_z;
};
inline float LengthSquared( Vec3::Arg v )
{
return Dot( v, v );
}
class Sym3x3
{
public:
Sym3x3()
{
}
Sym3x3( float s )
{
for( int i = 0; i < 6; ++i )
m_x[i] = s;
}
float operator[]( int index ) const
{
return m_x[index];
}
float& operator[]( int index )
{
return m_x[index];
}
private:
float m_x[6];
};
Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weights );
Vec3 ComputePrincipleComponent( Sym3x3 const& matrix );
} // namespace squish
#endif // ndef SQUISH_MATHS_H

201
3rdparty/libsquish/rangefit.cpp vendored Normal file
View File

@@ -0,0 +1,201 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include "rangefit.h"
#include "colourset.h"
#include "colourblock.h"
#include <cfloat>
namespace squish {
RangeFit::RangeFit( ColourSet const* colours, int flags, float* metric )
: ColourFit( colours, flags )
{
// initialise the metric (old perceptual = 0.2126f, 0.7152f, 0.0722f)
if( metric )
m_metric = Vec3( metric[0], metric[1], metric[2] );
else
m_metric = Vec3( 1.0f );
// initialise the best error
m_besterror = FLT_MAX;
// cache some values
int const count = m_colours->GetCount();
Vec3 const* values = m_colours->GetPoints();
float const* weights = m_colours->GetWeights();
// get the covariance matrix
Sym3x3 covariance = ComputeWeightedCovariance( count, values, weights );
// compute the principle component
Vec3 principle = ComputePrincipleComponent( covariance );
// get the min and max range as the codebook endpoints
Vec3 start( 0.0f );
Vec3 end( 0.0f );
if( count > 0 )
{
float min, max;
// compute the range
start = end = values[0];
min = max = Dot( values[0], principle );
for( int i = 1; i < count; ++i )
{
float val = Dot( values[i], principle );
if( val < min )
{
start = values[i];
min = val;
}
else if( val > max )
{
end = values[i];
max = val;
}
}
}
// clamp the output to [0, 1]
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
start = Min( one, Max( zero, start ) );
end = Min( one, Max( zero, end ) );
// clamp to the grid and save
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
Vec3 const half( 0.5f );
m_start = Truncate( grid*start + half )*gridrcp;
m_end = Truncate( grid*end + half )*gridrcp;
}
void RangeFit::Compress3( void* block )
{
// cache some values
int const count = m_colours->GetCount();
Vec3 const* values = m_colours->GetPoints();
// create a codebook
Vec3 codes[3];
codes[0] = m_start;
codes[1] = m_end;
codes[2] = 0.5f*m_start + 0.5f*m_end;
// match each point to the closest code
u8 closest[16];
float error = 0.0f;
for( int i = 0; i < count; ++i )
{
// find the closest code
float dist = FLT_MAX;
int idx = 0;
for( int j = 0; j < 3; ++j )
{
float d = LengthSquared( m_metric*( values[i] - codes[j] ) );
if( d < dist )
{
dist = d;
idx = j;
}
}
// save the index
closest[i] = ( u8 )idx;
// accumulate the error
error += dist;
}
// save this scheme if it wins
if( error < m_besterror )
{
// remap the indices
u8 indices[16];
m_colours->RemapIndices( closest, indices );
// save the block
WriteColourBlock3( m_start, m_end, indices, block );
// save the error
m_besterror = error;
}
}
void RangeFit::Compress4( void* block )
{
// cache some values
int const count = m_colours->GetCount();
Vec3 const* values = m_colours->GetPoints();
// create a codebook
Vec3 codes[4];
codes[0] = m_start;
codes[1] = m_end;
codes[2] = ( 2.0f/3.0f )*m_start + ( 1.0f/3.0f )*m_end;
codes[3] = ( 1.0f/3.0f )*m_start + ( 2.0f/3.0f )*m_end;
// match each point to the closest code
u8 closest[16];
float error = 0.0f;
for( int i = 0; i < count; ++i )
{
// find the closest code
float dist = FLT_MAX;
int idx = 0;
for( int j = 0; j < 4; ++j )
{
float d = LengthSquared( m_metric*( values[i] - codes[j] ) );
if( d < dist )
{
dist = d;
idx = j;
}
}
// save the index
closest[i] = ( u8 )idx;
// accumulate the error
error += dist;
}
// save this scheme if it wins
if( error < m_besterror )
{
// remap the indices
u8 indices[16];
m_colours->RemapIndices( closest, indices );
// save the block
WriteColourBlock4( m_start, m_end, indices, block );
// save the error
m_besterror = error;
}
}
} // namespace squish

54
3rdparty/libsquish/rangefit.h vendored Normal file
View File

@@ -0,0 +1,54 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_RANGEFIT_H
#define SQUISH_RANGEFIT_H
#include "squish.h"
#include "colourfit.h"
#include "maths.h"
namespace squish {
class ColourSet;
class RangeFit : public ColourFit
{
public:
RangeFit( ColourSet const* colours, int flags, float* metric );
private:
virtual void Compress3( void* block );
virtual void Compress4( void* block );
Vec3 m_metric;
Vec3 m_start;
Vec3 m_end;
float m_besterror;
};
} // squish
#endif // ndef SQUISH_RANGEFIT_H

32
3rdparty/libsquish/simd.h vendored Normal file
View File

@@ -0,0 +1,32 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_SIMD_H
#define SQUISH_SIMD_H
#include "maths.h"
#include "simd_float.h"
#endif // ndef SQUISH_SIMD_H

183
3rdparty/libsquish/simd_float.h vendored Normal file
View File

@@ -0,0 +1,183 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_SIMD_FLOAT_H
#define SQUISH_SIMD_FLOAT_H
#include <algorithm>
namespace squish {
#define VEC4_CONST( X ) Vec4( X )
class Vec4
{
public:
typedef Vec4 const& Arg;
Vec4() {}
explicit Vec4( float s )
: m_x( s ),
m_y( s ),
m_z( s ),
m_w( s )
{
}
Vec4( float x, float y, float z, float w )
: m_x( x ),
m_y( y ),
m_z( z ),
m_w( w )
{
}
Vec3 GetVec3() const
{
return Vec3( m_x, m_y, m_z );
}
Vec4 SplatX() const { return Vec4( m_x ); }
Vec4 SplatY() const { return Vec4( m_y ); }
Vec4 SplatZ() const { return Vec4( m_z ); }
Vec4 SplatW() const { return Vec4( m_w ); }
Vec4& operator+=( Arg v )
{
m_x += v.m_x;
m_y += v.m_y;
m_z += v.m_z;
m_w += v.m_w;
return *this;
}
Vec4& operator-=( Arg v )
{
m_x -= v.m_x;
m_y -= v.m_y;
m_z -= v.m_z;
m_w -= v.m_w;
return *this;
}
Vec4& operator*=( Arg v )
{
m_x *= v.m_x;
m_y *= v.m_y;
m_z *= v.m_z;
m_w *= v.m_w;
return *this;
}
friend Vec4 operator+( Vec4::Arg left, Vec4::Arg right )
{
Vec4 copy( left );
return copy += right;
}
friend Vec4 operator-( Vec4::Arg left, Vec4::Arg right )
{
Vec4 copy( left );
return copy -= right;
}
friend Vec4 operator*( Vec4::Arg left, Vec4::Arg right )
{
Vec4 copy( left );
return copy *= right;
}
//! Returns a*b + c
friend Vec4 MultiplyAdd( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
{
return a*b + c;
}
//! Returns -( a*b - c )
friend Vec4 NegativeMultiplySubtract( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
{
return c - a*b;
}
friend Vec4 Reciprocal( Vec4::Arg v )
{
return Vec4(
1.0f/v.m_x,
1.0f/v.m_y,
1.0f/v.m_z,
1.0f/v.m_w
);
}
friend Vec4 Min( Vec4::Arg left, Vec4::Arg right )
{
return Vec4(
std::min( left.m_x, right.m_x ),
std::min( left.m_y, right.m_y ),
std::min( left.m_z, right.m_z ),
std::min( left.m_w, right.m_w )
);
}
friend Vec4 Max( Vec4::Arg left, Vec4::Arg right )
{
return Vec4(
std::max( left.m_x, right.m_x ),
std::max( left.m_y, right.m_y ),
std::max( left.m_z, right.m_z ),
std::max( left.m_w, right.m_w )
);
}
friend Vec4 Truncate( Vec4::Arg v )
{
return Vec4(
v.m_x > 0.0f ? std::floor( v.m_x ) : std::ceil( v.m_x ),
v.m_y > 0.0f ? std::floor( v.m_y ) : std::ceil( v.m_y ),
v.m_z > 0.0f ? std::floor( v.m_z ) : std::ceil( v.m_z ),
v.m_w > 0.0f ? std::floor( v.m_w ) : std::ceil( v.m_w )
);
}
friend bool CompareAnyLessThan( Vec4::Arg left, Vec4::Arg right )
{
return left.m_x < right.m_x
|| left.m_y < right.m_y
|| left.m_z < right.m_z
|| left.m_w < right.m_w;
}
private:
float m_x;
float m_y;
float m_z;
float m_w;
};
} // namespace squish
#endif // ndef SQUISH_SIMD_FLOAT_H

172
3rdparty/libsquish/singlecolourfit.cpp vendored Normal file
View File

@@ -0,0 +1,172 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include "singlecolourfit.h"
#include "colourset.h"
#include "colourblock.h"
namespace squish {
struct SourceBlock
{
u8 start;
u8 end;
u8 error;
};
struct SingleColourLookup
{
SourceBlock sources[2];
};
#include "singlecolourlookup.inl"
static int FloatToInt( float a, int limit )
{
// use ANSI round-to-zero behaviour to get round-to-nearest
int i = ( int )( a + 0.5f );
// clamp to the limit
if( i < 0 )
i = 0;
else if( i > limit )
i = limit;
// done
return i;
}
SingleColourFit::SingleColourFit( ColourSet const* colours, int flags )
: ColourFit( colours, flags )
{
// grab the single colour
Vec3 const* values = m_colours->GetPoints();
m_colour[0] = ( u8 )FloatToInt( 255.0f*values->X(), 255 );
m_colour[1] = ( u8 )FloatToInt( 255.0f*values->Y(), 255 );
m_colour[2] = ( u8 )FloatToInt( 255.0f*values->Z(), 255 );
// initialise the best error
m_besterror = INT_MAX;
}
void SingleColourFit::Compress3( void* block )
{
// build the table of lookups
SingleColourLookup const* const lookups[] =
{
lookup_5_3,
lookup_6_3,
lookup_5_3
};
// find the best end-points and index
ComputeEndPoints( lookups );
// build the block if we win
if( m_error < m_besterror )
{
// remap the indices
u8 indices[16];
m_colours->RemapIndices( &m_index, indices );
// save the block
WriteColourBlock3( m_start, m_end, indices, block );
// save the error
m_besterror = m_error;
}
}
void SingleColourFit::Compress4( void* block )
{
// build the table of lookups
SingleColourLookup const* const lookups[] =
{
lookup_5_4,
lookup_6_4,
lookup_5_4
};
// find the best end-points and index
ComputeEndPoints( lookups );
// build the block if we win
if( m_error < m_besterror )
{
// remap the indices
u8 indices[16];
m_colours->RemapIndices( &m_index, indices );
// save the block
WriteColourBlock4( m_start, m_end, indices, block );
// save the error
m_besterror = m_error;
}
}
void SingleColourFit::ComputeEndPoints( SingleColourLookup const* const* lookups )
{
// check each index combination (endpoint or intermediate)
m_error = INT_MAX;
for( int index = 0; index < 2; ++index )
{
// check the error for this codebook index
SourceBlock const* sources[3];
int error = 0;
for( int channel = 0; channel < 3; ++channel )
{
// grab the lookup table and index for this channel
SingleColourLookup const* lookup = lookups[channel];
int target = m_colour[channel];
// store a pointer to the source for this channel
sources[channel] = lookup[target].sources + index;
// accumulate the error
int diff = sources[channel]->error;
error += diff*diff;
}
// keep it if the error is lower
if( error < m_error )
{
m_start = Vec3(
( float )sources[0]->start/31.0f,
( float )sources[1]->start/63.0f,
( float )sources[2]->start/31.0f
);
m_end = Vec3(
( float )sources[0]->end/31.0f,
( float )sources[1]->end/63.0f,
( float )sources[2]->end/31.0f
);
m_index = ( u8 )( 2*index );
m_error = error;
}
}
}
} // namespace squish

58
3rdparty/libsquish/singlecolourfit.h vendored Normal file
View File

@@ -0,0 +1,58 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_SINGLECOLOURFIT_H
#define SQUISH_SINGLECOLOURFIT_H
#include "squish.h"
#include "colourfit.h"
namespace squish {
class ColourSet;
struct SingleColourLookup;
class SingleColourFit : public ColourFit
{
public:
SingleColourFit( ColourSet const* colours, int flags );
private:
virtual void Compress3( void* block );
virtual void Compress4( void* block );
void ComputeEndPoints( SingleColourLookup const* const* lookups );
u8 m_colour[3];
Vec3 m_start;
Vec3 m_end;
u8 m_index;
int m_error;
int m_besterror;
};
} // namespace squish
#endif // ndef SQUISH_SINGLECOLOURFIT_H

1064
3rdparty/libsquish/singlecolourlookup.inl vendored Normal file

File diff suppressed because it is too large Load Diff

260
3rdparty/libsquish/squish.cpp vendored Normal file
View File

@@ -0,0 +1,260 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include "squish.h"
#include "colourset.h"
#include "maths.h"
#include "rangefit.h"
#include "clusterfit.h"
#include "colourblock.h"
#include "alpha.h"
#include "singlecolourfit.h"
namespace squish {
static int FixFlags( int flags )
{
// grab the flag bits
int method = flags & ( kDxt1 | kDxt3 | kDxt5 | kBc4 | kBc5 );
int fit = flags & ( kColourIterativeClusterFit | kColourClusterFit | kColourRangeFit );
int extra = flags & kWeightColourByAlpha;
// set defaults
if ( method != kDxt3
&& method != kDxt5
&& method != kBc4
&& method != kBc5 )
{
method = kDxt1;
}
if( fit != kColourRangeFit && fit != kColourIterativeClusterFit )
fit = kColourClusterFit;
// done
return method | fit | extra;
}
void CompressMasked( u8 const* rgba, int mask, void* block, int flags, float* metric )
{
// fix any bad flags
flags = FixFlags( flags );
if ( ( flags & ( kBc4 | kBc5 ) ) != 0 )
{
u8 alpha[16*4];
for( int i = 0; i < 16; ++i )
{
alpha[i*4 + 3] = rgba[i*4 + 0]; // copy R to A
}
u8* rBlock = reinterpret_cast< u8* >( block );
CompressAlphaDxt5( alpha, mask, rBlock );
if ( ( flags & ( kBc5 ) ) != 0 )
{
for( int i = 0; i < 16; ++i )
{
alpha[i*4 + 3] = rgba[i*4 + 1]; // copy G to A
}
u8* gBlock = reinterpret_cast< u8* >( block ) + 8;
CompressAlphaDxt5( alpha, mask, gBlock );
}
return;
}
// get the block locations
void* colourBlock = block;
void* alphaBlock = block;
if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
colourBlock = reinterpret_cast< u8* >( block ) + 8;
// create the minimal point set
ColourSet colours( rgba, mask, flags );
// check the compression type and compress colour
if( colours.GetCount() == 1 )
{
// always do a single colour fit
SingleColourFit fit( &colours, flags );
fit.Compress( colourBlock );
}
else if( ( flags & kColourRangeFit ) != 0 || colours.GetCount() == 0 )
{
// do a range fit
RangeFit fit( &colours, flags, metric );
fit.Compress( colourBlock );
}
else
{
// default to a cluster fit (could be iterative or not)
ClusterFit fit( &colours, flags, metric );
fit.Compress( colourBlock );
}
// compress alpha separately if necessary
if( ( flags & kDxt3 ) != 0 )
CompressAlphaDxt3( rgba, mask, alphaBlock );
else if( ( flags & kDxt5 ) != 0 )
CompressAlphaDxt5( rgba, mask, alphaBlock );
}
void Decompress( u8* rgba, void const* block, int flags )
{
// fix any bad flags
flags = FixFlags( flags );
// get the block locations
void const* colourBlock = block;
void const* alphaBock = block;
if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
colourBlock = reinterpret_cast< u8 const* >( block ) + 8;
// decompress colour
DecompressColour( rgba, colourBlock, ( flags & kDxt1 ) != 0 );
// decompress alpha separately if necessary
if( ( flags & kDxt3 ) != 0 )
DecompressAlphaDxt3( rgba, alphaBock );
else if( ( flags & kDxt5 ) != 0 )
DecompressAlphaDxt5( rgba, alphaBock );
}
int GetStorageRequirements( int width, int height, int flags )
{
// fix any bad flags
flags = FixFlags( flags );
// compute the storage requirements
int blockcount = ( ( width + 3 )/4 ) * ( ( height + 3 )/4 );
int blocksize = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
return blockcount*blocksize;
}
void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags, float* metric )
{
// fix any bad flags
flags = FixFlags( flags );
// initialise the block output
u8* targetBlock = reinterpret_cast< u8* >( blocks );
int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
// loop over blocks
for( int y = 0; y < height; y += 4 )
{
for( int x = 0; x < width; x += 4 )
{
// build the 4x4 block of pixels
u8 sourceRgba[16*4];
u8* targetPixel = sourceRgba;
int mask = 0;
for( int py = 0; py < 4; ++py )
{
for( int px = 0; px < 4; ++px )
{
// get the source pixel in the image
int sx = x + px;
int sy = y + py;
// enable if we're in the image
if( sx < width && sy < height )
{
// copy the rgba value
u8 const* sourcePixel = rgba + 4*( width*sy + sx );
for( int i = 0; i < 4; ++i )
*targetPixel++ = *sourcePixel++;
// enable this pixel
mask |= ( 1 << ( 4*py + px ) );
}
else
{
// skip this pixel as its outside the image
targetPixel += 4;
}
}
}
// compress it into the output
CompressMasked( sourceRgba, mask, targetBlock, flags, metric );
// advance
targetBlock += bytesPerBlock;
}
}
}
void DecompressImage( u8* rgba, int width, int height, void const* blocks, int flags )
{
// fix any bad flags
flags = FixFlags( flags );
// initialise the block input
u8 const* sourceBlock = reinterpret_cast< u8 const* >( blocks );
int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
// loop over blocks
for( int y = 0; y < height; y += 4 )
{
for( int x = 0; x < width; x += 4 )
{
// decompress the block
u8 targetRgba[4*16];
Decompress( targetRgba, sourceBlock, flags );
// write the decompressed pixels to the correct image locations
u8 const* sourcePixel = targetRgba;
for( int py = 0; py < 4; ++py )
{
for( int px = 0; px < 4; ++px )
{
// get the target location
int sx = x + px;
int sy = y + py;
if( sx < width && sy < height )
{
u8* targetPixel = rgba + 4*( width*sy + sx );
// copy the rgba value
for( int i = 0; i < 4; ++i )
*targetPixel++ = *sourcePixel++;
}
else
{
// skip this pixel as its outside the image
sourcePixel += 4;
}
}
}
// advance
sourceBlock += bytesPerBlock;
}
}
}
} // namespace squish

269
3rdparty/libsquish/squish.h vendored Normal file
View File

@@ -0,0 +1,269 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#ifndef SQUISH_H
#define SQUISH_H
//! All squish API functions live in this namespace.
namespace squish {
// -----------------------------------------------------------------------------
//! Typedef a quantity that is a single unsigned byte.
typedef unsigned char u8;
// -----------------------------------------------------------------------------
enum
{
//! Use DXT1 compression.
kDxt1 = ( 1 << 0 ),
//! Use DXT3 compression.
kDxt3 = ( 1 << 1 ),
//! Use DXT5 compression.
kDxt5 = ( 1 << 2 ),
//! Use BC4 compression.
kBc4 = ( 1 << 3 ),
//! Use BC5 compression.
kBc5 = ( 1 << 4 ),
//! Use a slow but high quality colour compressor (the default).
kColourClusterFit = ( 1 << 5 ),
//! Use a fast but low quality colour compressor.
kColourRangeFit = ( 1 << 6 ),
//! Weight the colour by alpha during cluster fit (disabled by default).
kWeightColourByAlpha = ( 1 << 7 ),
//! Use a very slow but very high quality colour compressor.
kColourIterativeClusterFit = ( 1 << 8 ),
};
// -----------------------------------------------------------------------------
/*! @brief Compresses a 4x4 block of pixels.
@param rgba The rgba values of the 16 source pixels.
@param mask The valid pixel mask.
@param block Storage for the compressed DXT block.
@param flags Compression flags.
@param metric An optional perceptual metric.
The source pixels should be presented as a contiguous array of 16 rgba
values, with each component as 1 byte each. In memory this should be:
{ r1, g1, b1, a1, .... , r16, g16, b16, a16 }
The mask parameter enables only certain pixels within the block. The lowest
bit enables the first pixel and so on up to the 16th bit. Bits beyond the
16th bit are ignored. Pixels that are not enabled are allowed to take
arbitrary colours in the output block. An example of how this can be used
is in the CompressImage function to disable pixels outside the bounds of
the image when the width or height is not divisible by 4.
The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression,
however, DXT1 will be used by default if none is specified. When using DXT1
compression, 8 bytes of storage are required for the compressed DXT block.
DXT3 and DXT5 compression require 16 bytes of storage per block.
The flags parameter can also specify a preferred colour compressor to use
when fitting the RGB components of the data. Possible colour compressors
are: kColourClusterFit (the default), kColourRangeFit (very fast, low
quality) or kColourIterativeClusterFit (slowest, best quality).
When using kColourClusterFit or kColourIterativeClusterFit, an additional
flag can be specified to weight the importance of each pixel by its alpha
value. For images that are rendered using alpha blending, this can
significantly increase the perceived quality.
The metric parameter can be used to weight the relative importance of each
colour channel, or pass NULL to use the default uniform weight of
{ 1.0f, 1.0f, 1.0f }. This replaces the previous flag-based control that
allowed either uniform or "perceptual" weights with the fixed values
{ 0.2126f, 0.7152f, 0.0722f }. If non-NULL, the metric should point to a
contiguous array of 3 floats.
*/
void CompressMasked( u8 const* rgba, int mask, void* block, int flags, float* metric = 0 );
// -----------------------------------------------------------------------------
/*! @brief Compresses a 4x4 block of pixels.
@param rgba The rgba values of the 16 source pixels.
@param block Storage for the compressed DXT block.
@param flags Compression flags.
@param metric An optional perceptual metric.
The source pixels should be presented as a contiguous array of 16 rgba
values, with each component as 1 byte each. In memory this should be:
{ r1, g1, b1, a1, .... , r16, g16, b16, a16 }
The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression,
however, DXT1 will be used by default if none is specified. When using DXT1
compression, 8 bytes of storage are required for the compressed DXT block.
DXT3 and DXT5 compression require 16 bytes of storage per block.
The flags parameter can also specify a preferred colour compressor to use
when fitting the RGB components of the data. Possible colour compressors
are: kColourClusterFit (the default), kColourRangeFit (very fast, low
quality) or kColourIterativeClusterFit (slowest, best quality).
When using kColourClusterFit or kColourIterativeClusterFit, an additional
flag can be specified to weight the importance of each pixel by its alpha
value. For images that are rendered using alpha blending, this can
significantly increase the perceived quality.
The metric parameter can be used to weight the relative importance of each
colour channel, or pass NULL to use the default uniform weight of
{ 1.0f, 1.0f, 1.0f }. This replaces the previous flag-based control that
allowed either uniform or "perceptual" weights with the fixed values
{ 0.2126f, 0.7152f, 0.0722f }. If non-NULL, the metric should point to a
contiguous array of 3 floats.
This method is an inline that calls CompressMasked with a mask of 0xffff,
provided for compatibility with older versions of squish.
*/
inline void Compress( u8 const* rgba, void* block, int flags, float* metric = 0 )
{
CompressMasked( rgba, 0xffff, block, flags, metric );
}
// -----------------------------------------------------------------------------
/*! @brief Decompresses a 4x4 block of pixels.
@param rgba Storage for the 16 decompressed pixels.
@param block The compressed DXT block.
@param flags Compression flags.
The decompressed pixels will be written as a contiguous array of 16 rgba
values, with each component as 1 byte each. In memory this is:
{ r1, g1, b1, a1, .... , r16, g16, b16, a16 }
The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression,
however, DXT1 will be used by default if none is specified. All other flags
are ignored.
*/
void Decompress( u8* rgba, void const* block, int flags );
// -----------------------------------------------------------------------------
/*! @brief Computes the amount of compressed storage required.
@param width The width of the image.
@param height The height of the image.
@param flags Compression flags.
The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression,
however, DXT1 will be used by default if none is specified. All other flags
are ignored.
Most DXT images will be a multiple of 4 in each dimension, but this
function supports arbitrary size images by allowing the outer blocks to
be only partially used.
*/
int GetStorageRequirements( int width, int height, int flags );
// -----------------------------------------------------------------------------
/*! @brief Compresses an image in memory.
@param rgba The pixels of the source.
@param width The width of the source image.
@param height The height of the source image.
@param blocks Storage for the compressed output.
@param flags Compression flags.
@param metric An optional perceptual metric.
The source pixels should be presented as a contiguous array of width*height
rgba values, with each component as 1 byte each. In memory this should be:
{ r1, g1, b1, a1, .... , rn, gn, bn, an } for n = width*height
The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression,
however, DXT1 will be used by default if none is specified. When using DXT1
compression, 8 bytes of storage are required for each compressed DXT block.
DXT3 and DXT5 compression require 16 bytes of storage per block.
The flags parameter can also specify a preferred colour compressor to use
when fitting the RGB components of the data. Possible colour compressors
are: kColourClusterFit (the default), kColourRangeFit (very fast, low
quality) or kColourIterativeClusterFit (slowest, best quality).
When using kColourClusterFit or kColourIterativeClusterFit, an additional
flag can be specified to weight the importance of each pixel by its alpha
value. For images that are rendered using alpha blending, this can
significantly increase the perceived quality.
The metric parameter can be used to weight the relative importance of each
colour channel, or pass NULL to use the default uniform weight of
{ 1.0f, 1.0f, 1.0f }. This replaces the previous flag-based control that
allowed either uniform or "perceptual" weights with the fixed values
{ 0.2126f, 0.7152f, 0.0722f }. If non-NULL, the metric should point to a
contiguous array of 3 floats.
Internally this function calls squish::CompressMasked for each block, which
allows for pixels outside the image to take arbitrary values. The function
squish::GetStorageRequirements can be called to compute the amount of memory
to allocate for the compressed output.
*/
void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags, float* metric = 0 );
// -----------------------------------------------------------------------------
/*! @brief Decompresses an image in memory.
@param rgba Storage for the decompressed pixels.
@param width The width of the source image.
@param height The height of the source image.
@param blocks The compressed DXT blocks.
@param flags Compression flags.
The decompressed pixels will be written as a contiguous array of width*height
16 rgba values, with each component as 1 byte each. In memory this is:
{ r1, g1, b1, a1, .... , rn, gn, bn, an } for n = width*height
The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression,
however, DXT1 will be used by default if none is specified. All other flags
are ignored.
Internally this function calls squish::Decompress for each block.
*/
void DecompressImage( u8* rgba, int width, int height, void const* blocks, int flags );
// -----------------------------------------------------------------------------
} // namespace squish
#endif // ndef SQUISH_H

10
3rdparty/lodepng/README.md vendored Normal file
View File

@@ -0,0 +1,10 @@
LodePNG
-------
PNG encoder and decoder in C and C++.
Home page: http://lodev.org/lodepng/
Only two files are needed to allow your program to read and write PNG files: lodepng.cpp and lodepng.h.
The other files in the project are just examples, unit tests, etc...

6224
3rdparty/lodepng/lodepng.cpp vendored Normal file

File diff suppressed because it is too large Load Diff

1759
3rdparty/lodepng/lodepng.h vendored Normal file

File diff suppressed because it is too large Load Diff

22
3rdparty/maratis-tcl/LICENSE vendored Normal file
View File

@@ -0,0 +1,22 @@
Maratis Tiny C library
Copyright (c) 2015 Anael Seghezzi <www.maratis3d.com>
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would
be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not
be misrepresented as being the original software.
3. This notice may not be removed or altered from any source
distribution.

2340
3rdparty/maratis-tcl/m_image.h vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,24 @@
NVIDIA Texture Tools 2.0 is licensed under the MIT license.
Copyright (c) 2007 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
files (the "Software"), to deal in the Software without
restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.

75
3rdparty/nvtt/bc6h/bits.h vendored Normal file
View File

@@ -0,0 +1,75 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
#ifndef _ZOH_BITS_H
#define _ZOH_BITS_H
// read/write a bitstream
#include "nvcore/debug.h"
namespace ZOH {
class Bits
{
public:
Bits(char *data, int maxdatabits) { nvAssert (data && maxdatabits > 0); bptr = bend = 0; bits = data; maxbits = maxdatabits; readonly = 0;}
Bits(const char *data, int availdatabits) { nvAssert (data && availdatabits > 0); bptr = 0; bend = availdatabits; cbits = data; maxbits = availdatabits; readonly = 1;}
void write(int value, int nbits) {
nvAssert (nbits >= 0 && nbits < 32);
nvAssert (sizeof(int)>= 4);
for (int i=0; i<nbits; ++i)
writeone(value>>i);
}
int read(int nbits) {
nvAssert (nbits >= 0 && nbits < 32);
nvAssert (sizeof(int)>= 4);
int out = 0;
for (int i=0; i<nbits; ++i)
out |= readone() << i;
return out;
}
int getptr() { return bptr; }
void setptr(int ptr) { nvAssert (ptr >= 0 && ptr < maxbits); bptr = ptr; }
int getsize() { return bend; }
private:
int bptr; // next bit to read
int bend; // last written bit + 1
char *bits; // ptr to user bit stream
const char *cbits; // ptr to const user bit stream
int maxbits; // max size of user bit stream
char readonly; // 1 if this is a read-only stream
int readone() {
nvAssert (bptr < bend);
if (bptr >= bend) return 0;
int bit = (readonly ? cbits[bptr>>3] : bits[bptr>>3]) & (1 << (bptr & 7));
++bptr;
return bit != 0;
}
void writeone(int bit) {
nvAssert (!readonly); // "Writing a read-only bit stream"
nvAssert (bptr < maxbits);
if (bptr >= maxbits) return;
if (bit&1)
bits[bptr>>3] |= 1 << (bptr & 7);
else
bits[bptr>>3] &= ~(1 << (bptr & 7));
if (bptr++ >= bend) bend = bptr;
}
};
}
#endif

133
3rdparty/nvtt/bc6h/shapes_two.h vendored Normal file
View File

@@ -0,0 +1,133 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
#pragma once
#ifndef _ZOH_SHAPES_TWO_H
#define _ZOH_SHAPES_TWO_H
// shapes for two regions
#define NREGIONS 2
#define NSHAPES 64
#define SHAPEBITS 6
static const int shapes[NSHAPES*16] =
{
0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1,
0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1,
0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1,
0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1,
0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1,
0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1,
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1,
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1,
1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1,
1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1,
1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0,
0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1,
0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1,
0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1,
0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0,
0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0,
0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1,
0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1,
1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1,
1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0,
0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1,
0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1,
0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0,
0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0,
0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1,
1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0,
0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0,
1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1,
0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1,
0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1,
1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1,
1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0,
1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0,
1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0,
0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0,
0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1,
1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1,
1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0,
0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0,
0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1,
1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0,
1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0,
1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1,
0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0,
1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0,
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1,
0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1,
};
#define REGION(x,y,si) shapes[((si)&3)*4+((si)>>2)*64+(x)+(y)*16]
static const int shapeindex_to_compressed_indices[NSHAPES*2] =
{
0,15, 0,15, 0,15, 0,15,
0,15, 0,15, 0,15, 0,15,
0,15, 0,15, 0,15, 0,15,
0,15, 0,15, 0,15, 0,15,
0,15, 0, 2, 0, 8, 0, 2,
0, 2, 0, 8, 0, 8, 0,15,
0, 2, 0, 8, 0, 2, 0, 2,
0, 8, 0, 8, 0, 2, 0, 2,
0,15, 0,15, 0, 6, 0, 8,
0, 2, 0, 8, 0,15, 0,15,
0, 2, 0, 8, 0, 2, 0, 2,
0, 2, 0,15, 0,15, 0, 6,
0, 6, 0, 2, 0, 6, 0, 8,
0,15, 0,15, 0, 2, 0, 2,
0,15, 0,15, 0,15, 0,15,
0,15, 0, 2, 0, 2, 0,15
};
#define SHAPEINDEX_TO_COMPRESSED_INDICES(si,region) shapeindex_to_compressed_indices[(si)*2+(region)]
#endif

82
3rdparty/nvtt/bc6h/tile.h vendored Normal file
View File

@@ -0,0 +1,82 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
#ifndef _ZOH_TILE_H
#define _ZOH_TILE_H
#include "zoh_utils.h"
#include "nvmath/vector.h"
#include <math.h>
namespace ZOH {
//#define USE_IMPORTANCE_MAP 1 // define this if you want to increase importance of some pixels in tile
class Tile
{
public:
// NOTE: this returns the appropriately-clamped BIT PATTERN of the half as an INTEGRAL float value
static float half2float(uint16 h)
{
return (float) Utils::ushort_to_format(h);
}
// NOTE: this is the inverse of the above operation
static uint16 float2half(float f)
{
return Utils::format_to_ushort((int)f);
}
// look for adjacent pixels that are identical. if there are enough of them, increase their importance
void generate_importance_map()
{
// initialize
for (int y=0; y<size_y; ++y)
for (int x=0; x<size_x; ++x)
{
// my importance is increased if I am identical to any of my 4-neighbors
importance_map[y][x] = match_4_neighbor(x,y) ? 5.0f : 1.0f;
}
}
bool is_equal(int x, int y, int xn, int yn)
{
if (xn < 0 || xn >= size_x || yn < 0 || yn >= size_y)
return false;
return( (data[y][x].x == data[yn][xn].x) &&
(data[y][x].y == data[yn][xn].y) &&
(data[y][x].z == data[yn][xn].z) );
}
#ifdef USE_IMPORTANCE_MAP
bool match_4_neighbor(int x, int y)
{
return is_equal(x,y,x-1,y) || is_equal(x,y,x+1,y) || is_equal(x,y,x,y-1) || is_equal(x,y,x,y+1);
}
#else
bool match_4_neighbor(int, int)
{
return false;
}
#endif
Tile() {};
~Tile(){};
Tile(int xs, int ys) {size_x = xs; size_y = ys;}
static const int TILE_H = 4;
static const int TILE_W = 4;
static const int TILE_TOTAL = TILE_H * TILE_W;
nv::Vector3 data[TILE_H][TILE_W];
float importance_map[TILE_H][TILE_W];
int size_x, size_y; // actual size of tile
};
}
#endif // _ZOH_TILE_H

197
3rdparty/nvtt/bc6h/zoh.cpp vendored Normal file
View File

@@ -0,0 +1,197 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
// the zoh compressor and decompressor
#include "tile.h"
#include "zoh.h"
#include <string.h> // memcpy
using namespace ZOH;
bool ZOH::isone(const char *block)
{
char code = block[0] & 0x1F;
return (code == 0x03 || code == 0x07 || code == 0x0b || code == 0x0f);
}
void ZOH::compress(const Tile &t, char *block)
{
char oneblock[ZOH::BLOCKSIZE], twoblock[ZOH::BLOCKSIZE];
float mseone = ZOH::compressone(t, oneblock);
float msetwo = ZOH::compresstwo(t, twoblock);
if (mseone <= msetwo)
memcpy(block, oneblock, ZOH::BLOCKSIZE);
else
memcpy(block, twoblock, ZOH::BLOCKSIZE);
}
void ZOH::decompress(const char *block, Tile &t)
{
if (ZOH::isone(block))
ZOH::decompressone(block, t);
else
ZOH::decompresstwo(block, t);
}
/*
void ZOH::compress(string inf, string zohf)
{
Array2D<Rgba> pixels;
int w, h;
char block[ZOH::BLOCKSIZE];
Exr::readRgba(inf, pixels, w, h);
FILE *zohfile = fopen(zohf.c_str(), "wb");
if (zohfile == NULL) throw "Unable to open .zoh file for write";
// stuff for progress bar O.o
int ntiles = ((h+Tile::TILE_H-1)/Tile::TILE_H)*((w+Tile::TILE_W-1)/Tile::TILE_W);
int tilecnt = 0;
int ndots = 25;
int dotcnt = 0;
printf("Progress [");
for (int i=0; i<ndots;++i) printf(" ");
printf("]\rProgress ["); fflush(stdout);
// convert to tiles and compress each tile
for (int y=0; y<h; y+=Tile::TILE_H)
{
int ysize = min(Tile::TILE_H, h-y);
for (int x=0; x<w; x+=Tile::TILE_W)
{
int xsize = min(Tile::TILE_W, w-x);
Tile t(xsize, ysize);
t.insert(pixels, x, y);
ZOH::compress(t, block);
if (fwrite(block, sizeof(char), ZOH::BLOCKSIZE, zohfile) != ZOH::BLOCKSIZE)
throw "File error on write";
// progress bar
++tilecnt;
if (tilecnt > (ntiles * dotcnt)/ndots) { printf("."); fflush(stdout); ++dotcnt; }
}
}
printf("]\n"); // advance to next line finally
if (fclose(zohfile)) throw "Close failed on .zoh file";
}
static int str2int(std::string s)
{
int thing;
std::stringstream str (stringstream::in | stringstream::out);
str << s;
str >> thing;
return thing;
}
// zoh file name is ...-w-h.zoh, extract width and height
static void extract(string zohf, int &w, int &h)
{
size_t n = zohf.rfind('.', zohf.length()-1);
size_t n1 = zohf.rfind('-', n-1);
size_t n2 = zohf.rfind('-', n1-1);
string width = zohf.substr(n2+1, n1-n2-1);
w = str2int(width);
string height = zohf.substr(n1+1, n-n1-1);
h = str2int(height);
}
static int mode_to_prec[] = {
10,7,11,10,
10,7,11,11,
10,7,11,12,
10,7,9,16,
10,7,8,-1,
10,7,8,-1,
10,7,8,-1,
10,7,6,-1,
};
static int shapeindexhist[32], modehist[32], prechistone[16], prechisttwo[16], oneregion, tworegions;
static void stats(char block[ZOH::BLOCKSIZE])
{
char mode = block[0] & 0x1F; if ((mode & 0x3) == 0) mode = 0; if ((mode & 0x3) == 1) mode = 1; modehist[mode]++;
int prec = mode_to_prec[mode];
nvAssert (prec != -1);
if (!ZOH::isone(block))
{
tworegions++;
prechisttwo[prec]++;
int shapeindex = ((block[0] & 0xe0) >> 5) | ((block[1] & 0x3) << 3);
shapeindexhist[shapeindex]++;
}
else
{
oneregion++;
prechistone[prec]++;
}
}
static void printstats()
{
printf("\nPrecision histogram 10b to 16b one region: "); for (int i=10; i<=16; ++i) printf("%d,", prechistone[i]);
printf("\nPrecision histogram 6b to 11b two regions: "); for (int i=6; i<=11; ++i) printf("%d,", prechisttwo[i]);
printf("\nMode histogram: "); for (int i=0; i<32; ++i) printf("%d,", modehist[i]);
printf("\nShape index histogram: "); for (int i=0; i<32; ++i) printf("%d,", shapeindexhist[i]);
printf("\nOne region %5.2f%% Two regions %5.2f%%", 100.0*oneregion/float(oneregion+tworegions), 100.0*tworegions/float(oneregion+tworegions));
printf("\n");
}
void ZOH::decompress(string zohf, string outf)
{
Array2D<Rgba> pixels;
int w, h;
char block[ZOH::BLOCKSIZE];
extract(zohf, w, h);
FILE *zohfile = fopen(zohf.c_str(), "rb");
if (zohfile == NULL) throw "Unable to open .zoh file for read";
pixels.resizeErase(h, w);
// convert to tiles and decompress each tile
for (int y=0; y<h; y+=Tile::TILE_H)
{
int ysize = min(Tile::TILE_H, h-y);
for (int x=0; x<w; x+=Tile::TILE_W)
{
int xsize = min(Tile::TILE_W, w-x);
Tile t(xsize, ysize);
if (fread(block, sizeof(char), ZOH::BLOCKSIZE, zohfile) != ZOH::BLOCKSIZE)
throw "File error on read";
stats(block); // collect statistics
ZOH::decompress(block, t);
t.extract(pixels, x, y);
}
}
if (fclose(zohfile)) throw "Close failed on .zoh file";
Exr::writeRgba(outf, pixels, w, h);
#ifndef EXTERNAL_RELEASE
printstats(); // print statistics
#endif
}
*/

65
3rdparty/nvtt/bc6h/zoh.h vendored Normal file
View File

@@ -0,0 +1,65 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
#pragma once
#ifndef _ZOH_H
#define _ZOH_H
#include "tile.h"
namespace ZOH {
// UNUSED ZOH MODES are 0x13, 0x17, 0x1b, 0x1f
static const int NREGIONS_TWO = 2;
static const int NREGIONS_ONE = 1;
static const int NCHANNELS = 3;
struct FltEndpts
{
nv::Vector3 A;
nv::Vector3 B;
};
struct IntEndpts
{
int A[NCHANNELS];
int B[NCHANNELS];
};
struct ComprEndpts
{
uint A[NCHANNELS];
uint B[NCHANNELS];
};
static const int BLOCKSIZE=16;
static const int BITSIZE=128;
void compress(const Tile &t, char *block);
void decompress(const char *block, Tile &t);
float compressone(const Tile &t, char *block);
float compresstwo(const Tile &t, char *block);
void decompressone(const char *block, Tile &t);
void decompresstwo(const char *block, Tile &t);
float refinetwo(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_TWO], char *block);
float roughtwo(const Tile &tile, int shape, FltEndpts endpts[NREGIONS_TWO]);
float refineone(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_ONE], char *block);
float roughone(const Tile &tile, int shape, FltEndpts endpts[NREGIONS_ONE]);
bool isone(const char *block);
}
#endif // _ZOH_H

324
3rdparty/nvtt/bc6h/zoh_utils.cpp vendored Normal file
View File

@@ -0,0 +1,324 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
// Utility and common routines
#include "zoh_utils.h"
#include "nvmath/vector.inl"
#include <math.h>
using namespace nv;
using namespace ZOH;
static const int denom7_weights_64[] = {0, 9, 18, 27, 37, 46, 55, 64}; // divided by 64
static const int denom15_weights_64[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64}; // divided by 64
/*static*/ Format Utils::FORMAT;
int Utils::lerp(int a, int b, int i, int denom)
{
nvDebugCheck (denom == 3 || denom == 7 || denom == 15);
nvDebugCheck (i >= 0 && i <= denom);
int round = 32, shift = 6;
const int *weights;
switch(denom)
{
case 3: denom *= 5; i *= 5; // fall through to case 15
case 15: weights = denom15_weights_64; break;
case 7: weights = denom7_weights_64; break;
default: nvDebugCheck(0);
}
return (a*weights[denom-i] +b*weights[i] + round) >> shift;
}
Vector3 Utils::lerp(const Vector3& a, const Vector3 &b, int i, int denom)
{
nvDebugCheck (denom == 3 || denom == 7 || denom == 15);
nvDebugCheck (i >= 0 && i <= denom);
int shift = 6;
const int *weights;
switch(denom)
{
case 3: denom *= 5; i *= 5; // fall through to case 15
case 15: weights = denom15_weights_64; break;
case 7: weights = denom7_weights_64; break;
default: nvUnreachable();
}
// no need to round these as this is an exact division
return (a*float(weights[denom-i]) +b*float(weights[i])) / float(1 << shift);
}
/*
For unsigned f16, clamp the input to [0,F16MAX]. Thus u15.
For signed f16, clamp the input to [-F16MAX,F16MAX]. Thus s16.
The conversions proceed as follows:
unsigned f16: get bits. if high bit set, clamp to 0, else clamp to F16MAX.
signed f16: get bits. extract exp+mantissa and clamp to F16MAX. return -value if sign bit was set, else value
unsigned int: get bits. return as a positive value.
signed int. get bits. return as a value in -32768..32767.
The inverse conversions are just the inverse of the above.
*/
// clamp the 3 channels of the input vector to the allowable range based on FORMAT
// note that each channel is a float storing the allowable range as a bit pattern converted to float
// that is, for unsigned f16 say, we would clamp each channel to the range [0, F16MAX]
void Utils::clamp(Vector3 &v)
{
for (int i=0; i<3; ++i)
{
switch(Utils::FORMAT)
{
case UNSIGNED_F16:
if (v.component[i] < 0.0) v.component[i] = 0;
else if (v.component[i] > F16MAX) v.component[i] = F16MAX;
break;
case SIGNED_F16:
if (v.component[i] < -F16MAX) v.component[i] = -F16MAX;
else if (v.component[i] > F16MAX) v.component[i] = F16MAX;
break;
default:
nvUnreachable();
}
}
}
// convert a u16 value to s17 (represented as an int) based on the format expected
int Utils::ushort_to_format(unsigned short input)
{
int out, s;
// clamp to the valid range we are expecting
switch (Utils::FORMAT)
{
case UNSIGNED_F16:
if (input & F16S_MASK) out = 0;
else if (input > F16MAX) out = F16MAX;
else out = input;
break;
case SIGNED_F16:
s = input & F16S_MASK;
input &= F16EM_MASK;
if (input > F16MAX) out = F16MAX;
else out = input;
out = s ? -out : out;
break;
}
return out;
}
// convert a s17 value to u16 based on the format expected
unsigned short Utils::format_to_ushort(int input)
{
unsigned short out;
// clamp to the valid range we are expecting
switch (Utils::FORMAT)
{
case UNSIGNED_F16:
nvDebugCheck (input >= 0 && input <= F16MAX);
out = input;
break;
case SIGNED_F16:
nvDebugCheck (input >= -F16MAX && input <= F16MAX);
// convert to sign-magnitude
int s;
if (input < 0) { s = F16S_MASK; input = -input; }
else { s = 0; }
out = s | input;
break;
}
return out;
}
// quantize the input range into equal-sized bins
int Utils::quantize(float value, int prec)
{
int q, ivalue, s;
nvDebugCheck (prec > 1); // didn't bother to make it work for 1
value = (float)floor(value + 0.5);
int bias = (prec > 10) ? ((1<<(prec-1))-1) : 0; // bias precisions 11..16 to get a more accurate quantization
switch (Utils::FORMAT)
{
case UNSIGNED_F16:
nvDebugCheck (value >= 0 && value <= F16MAX);
ivalue = (int)value;
q = ((ivalue << prec) + bias) / (F16MAX+1);
nvDebugCheck (q >= 0 && q < (1 << prec));
break;
case SIGNED_F16:
nvDebugCheck (value >= -F16MAX && value <= F16MAX);
// convert to sign-magnitude
ivalue = (int)value;
if (ivalue < 0) { s = 1; ivalue = -ivalue; } else s = 0;
q = ((ivalue << (prec-1)) + bias) / (F16MAX+1);
if (s)
q = -q;
nvDebugCheck (q > -(1 << (prec-1)) && q < (1 << (prec-1)));
break;
}
return q;
}
int Utils::finish_unquantize(int q, int prec)
{
if (Utils::FORMAT == UNSIGNED_F16)
return (q * 31) >> 6; // scale the magnitude by 31/64
else if (Utils::FORMAT == SIGNED_F16)
return (q < 0) ? -(((-q) * 31) >> 5) : (q * 31) >> 5; // scale the magnitude by 31/32
else
return q;
}
// unquantize each bin to midpoint of original bin range, except
// for the end bins which we push to an endpoint of the bin range.
// we do this to ensure we can represent all possible original values.
// the asymmetric end bins do not affect PSNR for the test images.
//
// code this function assuming an arbitrary bit pattern as the encoded block
int Utils::unquantize(int q, int prec)
{
int unq, s;
nvDebugCheck (prec > 1); // not implemented for prec 1
switch (Utils::FORMAT)
{
// modify this case to move the multiplication by 31 after interpolation.
// Need to use finish_unquantize.
// since we have 16 bits available, let's unquantize this to 16 bits unsigned
// thus the scale factor is [0-7c00)/[0-10000) = 31/64
case UNSIGNED_F16:
if (prec >= 15)
unq = q;
else if (q == 0)
unq = 0;
else if (q == ((1<<prec)-1))
unq = U16MAX;
else
unq = (q * (U16MAX+1) + (U16MAX+1)/2) >> prec;
break;
// here, let's stick with S16 (no apparent quality benefit from going to S17)
// range is (-7c00..7c00)/(-8000..8000) = 31/32
case SIGNED_F16:
// don't remove this test even though it appears equivalent to the code below
// as it isn't -- the code below can overflow for prec = 16
if (prec >= 16)
unq = q;
else
{
if (q < 0) { s = 1; q = -q; } else s = 0;
if (q == 0)
unq = 0;
else if (q >= ((1<<(prec-1))-1))
unq = s ? -S16MAX : S16MAX;
else
{
unq = (q * (S16MAX+1) + (S16MAX+1)/2) >> (prec-1);
if (s)
unq = -unq;
}
}
break;
}
return unq;
}
// pick a norm!
#define NORM_EUCLIDEAN 1
float Utils::norm(const Vector3 &a, const Vector3 &b)
{
#ifdef NORM_EUCLIDEAN
return lengthSquared(a - b);
#endif
#ifdef NORM_ABS
Vector3 err = a - b;
return fabs(err.x) + fabs(err.y) + fabs(err.z);
#endif
}
// parse <name>[<start>{:<end>}]{,}
// the pointer starts here ^
// name is 1 or 2 chars and matches field names. start and end are decimal numbers
void Utils::parse(const char *encoding, int &ptr, Field &field, int &endbit, int &len)
{
if (ptr <= 0) return;
--ptr;
if (encoding[ptr] == ',') --ptr;
nvDebugCheck (encoding[ptr] == ']');
--ptr;
endbit = 0;
int scale = 1;
while (encoding[ptr] != ':' && encoding[ptr] != '[')
{
nvDebugCheck(encoding[ptr] >= '0' && encoding[ptr] <= '9');
endbit += (encoding[ptr--] - '0') * scale;
scale *= 10;
}
int startbit = 0; scale = 1;
if (encoding[ptr] == '[')
startbit = endbit;
else
{
ptr--;
while (encoding[ptr] != '[')
{
nvDebugCheck(encoding[ptr] >= '0' && encoding[ptr] <= '9');
startbit += (encoding[ptr--] - '0') * scale;
scale *= 10;
}
}
len = startbit - endbit + 1; // startbit>=endbit note
--ptr;
if (encoding[ptr] == 'm') field = FIELD_M;
else if (encoding[ptr] == 'd') field = FIELD_D;
else {
// it's wxyz
nvDebugCheck (encoding[ptr] >= 'w' && encoding[ptr] <= 'z');
int foo = encoding[ptr--] - 'w';
// now it is r g or b
if (encoding[ptr] == 'r') foo += 10;
else if (encoding[ptr] == 'g') foo += 20;
else if (encoding[ptr] == 'b') foo += 30;
else nvDebugCheck(0);
field = (Field) foo;
}
}

72
3rdparty/nvtt/bc6h/zoh_utils.h vendored Normal file
View File

@@ -0,0 +1,72 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
// utility class holding common routines
#ifndef _ZOH_UTILS_H
#define _ZOH_UTILS_H
#include "nvmath/vector.h"
namespace ZOH {
inline int SIGN_EXTEND(int x, int nb) { return ((((signed(x))&(1<<((nb)-1)))?((~0)<<(nb)):0)|(signed(x))); }
enum Field {
FIELD_M = 1, // mode
FIELD_D = 2, // distribution/shape
FIELD_RW = 10+0, FIELD_RX = 10+1, FIELD_RY = 10+2, FIELD_RZ = 10+3, // red channel endpoints or deltas
FIELD_GW = 20+0, FIELD_GX = 20+1, FIELD_GY = 20+2, FIELD_GZ = 20+3, // green channel endpoints or deltas
FIELD_BW = 30+0, FIELD_BX = 30+1, FIELD_BY = 30+2, FIELD_BZ = 30+3, // blue channel endpoints or deltas
};
// some constants
static const int F16S_MASK = 0x8000; // f16 sign mask
static const int F16EM_MASK = 0x7fff; // f16 exp & mantissa mask
static const int U16MAX = 0xffff;
static const int S16MIN = -0x8000;
static const int S16MAX = 0x7fff;
static const int INT16_MASK = 0xffff;
static const int F16MAX = 0x7bff; // MAXFLT bit pattern for halfs
enum Format { UNSIGNED_F16, SIGNED_F16 };
class Utils
{
public:
static Format FORMAT; // this is a global -- we're either handling unsigned or unsigned half values
// error metrics
static float norm(const nv::Vector3 &a, const nv::Vector3 &b);
static float mpsnr_norm(const nv::Vector3 &a, int exposure, const nv::Vector3 &b);
// conversion & clamp
static int ushort_to_format(unsigned short input);
static unsigned short format_to_ushort(int input);
// clamp to format
static void clamp(nv::Vector3 &v);
// quantization and unquantization
static int finish_unquantize(int q, int prec);
static int unquantize(int q, int prec);
static int quantize(float value, int prec);
static void parse(const char *encoding, int &ptr, Field & field, int &endbit, int &len);
// lerping
static int lerp(int a, int b, int i, int denom);
static nv::Vector3 lerp(const nv::Vector3 & a, const nv::Vector3 & b, int i, int denom);
};
}
#endif // _ZOH_UTILS_H

799
3rdparty/nvtt/bc6h/zohone.cpp vendored Normal file
View File

@@ -0,0 +1,799 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
// one region zoh compress/decompress code
// Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis
#include "bits.h"
#include "tile.h"
#include "zoh.h"
#include "zoh_utils.h"
#include "nvmath/vector.inl"
#include "nvmath/fitting.h"
#include <string.h> // strlen
#include <float.h> // FLT_MAX
using namespace nv;
using namespace ZOH;
#define NINDICES 16
#define INDEXBITS 4
#define HIGH_INDEXBIT (1<<(INDEXBITS-1))
#define DENOM (NINDICES-1)
#define NSHAPES 1
static const int shapes[NSHAPES] =
{
0x0000
}; // only 1 shape
#define REGION(x,y,shapeindex) ((shapes[shapeindex]&(1<<(15-(x)-4*(y))))!=0)
#define POS_TO_X(pos) ((pos)&3)
#define POS_TO_Y(pos) (((pos)>>2)&3)
#define NDELTA 2
struct Chanpat
{
int prec[NDELTA]; // precision pattern for one channel
};
struct Pattern
{
Chanpat chan[NCHANNELS];// allow different bit patterns per channel -- but we still want constant precision per channel
int transformed; // if 0, deltas are unsigned and no transform; otherwise, signed and transformed
int mode; // associated mode value
int modebits; // number of mode bits
const char *encoding; // verilog description of encoding for this mode
};
#define MAXMODEBITS 5
#define MAXMODES (1<<MAXMODEBITS)
#define NPATTERNS 4
static const Pattern patterns[NPATTERNS] =
{
16,4, 16,4, 16,4, 1, 0x0f, 5, "bw[10],bw[11],bw[12],bw[13],bw[14],bw[15],bx[3:0],gw[10],gw[11],gw[12],gw[13],gw[14],gw[15],gx[3:0],rw[10],rw[11],rw[12],rw[13],rw[14],rw[15],rx[3:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
12,8, 12,8, 12,8, 1, 0x0b, 5, "bw[10],bw[11],bx[7:0],gw[10],gw[11],gx[7:0],rw[10],rw[11],rx[7:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
11,9, 11,9, 11,9, 1, 0x07, 5, "bw[10],bx[8:0],gw[10],gx[8:0],rw[10],rx[8:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
10,10, 10,10, 10,10, 0, 0x03, 5, "bx[9:0],gx[9:0],rx[9:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
};
// mapping of mode to the corresponding index in pattern
static const int mode_to_pat[MAXMODES] = {
-1,-1,-1,
3, // 0x03
-1,-1,-1,
2, // 0x07
-1,-1,-1,
1, // 0x0b
-1,-1,-1,
0, // 0x0f
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
};
#define R_0(ep) (ep)[0].A[i]
#define R_1(ep) (ep)[0].B[i]
#define MASK(n) ((1<<(n))-1)
// compress endpoints
static void compress_endpts(const IntEndpts in[NREGIONS_ONE], ComprEndpts out[NREGIONS_ONE], const Pattern &p)
{
if (p.transformed)
{
for (int i=0; i<NCHANNELS; ++i)
{
R_0(out) = R_0(in) & MASK(p.chan[i].prec[0]);
R_1(out) = (R_1(in) - R_0(in)) & MASK(p.chan[i].prec[1]);
}
}
else
{
for (int i=0; i<NCHANNELS; ++i)
{
R_0(out) = R_0(in) & MASK(p.chan[i].prec[0]);
R_1(out) = R_1(in) & MASK(p.chan[i].prec[1]);
}
}
}
// decompress endpoints
static void decompress_endpts(const ComprEndpts in[NREGIONS_ONE], IntEndpts out[NREGIONS_ONE], const Pattern &p)
{
bool issigned = Utils::FORMAT == SIGNED_F16;
if (p.transformed)
{
for (int i=0; i<NCHANNELS; ++i)
{
R_0(out) = issigned ? SIGN_EXTEND(R_0(in),p.chan[i].prec[0]) : R_0(in);
int t;
t = SIGN_EXTEND(R_1(in), p.chan[i].prec[1]);
t = (t + R_0(in)) & MASK(p.chan[i].prec[0]);
R_1(out) = issigned ? SIGN_EXTEND(t,p.chan[i].prec[0]) : t;
}
}
else
{
for (int i=0; i<NCHANNELS; ++i)
{
R_0(out) = issigned ? SIGN_EXTEND(R_0(in),p.chan[i].prec[0]) : R_0(in);
R_1(out) = issigned ? SIGN_EXTEND(R_1(in),p.chan[i].prec[1]) : R_1(in);
}
}
}
static void quantize_endpts(const FltEndpts endpts[NREGIONS_ONE], int prec, IntEndpts q_endpts[NREGIONS_ONE])
{
for (int region = 0; region < NREGIONS_ONE; ++region)
{
q_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, prec);
q_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, prec);
q_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, prec);
q_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, prec);
q_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, prec);
q_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, prec);
}
}
// swap endpoints as needed to ensure that the indices at index_one and index_one have a 0 high-order bit
// index_one is 0 at x=0 y=0 and 15 at x=3 y=3 so y = (index >> 2) & 3 and x = index & 3
static void swap_indices(IntEndpts endpts[NREGIONS_ONE], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
{
int index_positions[NREGIONS_ONE];
index_positions[0] = 0; // since WLOG we have the high bit of the shapes at 0
for (int region = 0; region < NREGIONS_ONE; ++region)
{
int x = index_positions[region] & 3;
int y = (index_positions[region] >> 2) & 3;
nvDebugCheck(REGION(x,y,shapeindex) == region); // double check the table
if (indices[y][x] & HIGH_INDEXBIT)
{
// high bit is set, swap the endpts and indices for this region
int t;
for (int i=0; i<NCHANNELS; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; }
for (int y = 0; y < Tile::TILE_H; y++)
for (int x = 0; x < Tile::TILE_W; x++)
if (REGION(x,y,shapeindex) == region)
indices[y][x] = NINDICES - 1 - indices[y][x];
}
}
}
// endpoints fit only if the compression was lossless
static bool endpts_fit(const IntEndpts orig[NREGIONS_ONE], const ComprEndpts compressed[NREGIONS_ONE], const Pattern &p)
{
IntEndpts uncompressed[NREGIONS_ONE];
decompress_endpts(compressed, uncompressed, p);
for (int j=0; j<NREGIONS_ONE; ++j)
for (int i=0; i<NCHANNELS; ++i)
{
if (orig[j].A[i] != uncompressed[j].A[i]) return false;
if (orig[j].B[i] != uncompressed[j].B[i]) return false;
}
return true;
}
static void write_header(const ComprEndpts endpts[NREGIONS_ONE], const Pattern &p, Bits &out)
{
// interpret the verilog backwards and process it
int m = p.mode;
int rw = endpts[0].A[0], rx = endpts[0].B[0];
int gw = endpts[0].A[1], gx = endpts[0].B[1];
int bw = endpts[0].A[2], bx = endpts[0].B[2];
int ptr = int(strlen(p.encoding));
while (ptr)
{
Field field;
int endbit, len;
// !!!UNDONE: get rid of string parsing!!!
Utils::parse(p.encoding, ptr, field, endbit, len);
switch(field)
{
case FIELD_M: out.write( m >> endbit, len); break;
case FIELD_RW: out.write(rw >> endbit, len); break;
case FIELD_RX: out.write(rx >> endbit, len); break;
case FIELD_GW: out.write(gw >> endbit, len); break;
case FIELD_GX: out.write(gx >> endbit, len); break;
case FIELD_BW: out.write(bw >> endbit, len); break;
case FIELD_BX: out.write(bx >> endbit, len); break;
case FIELD_D:
case FIELD_RY:
case FIELD_RZ:
case FIELD_GY:
case FIELD_GZ:
case FIELD_BY:
case FIELD_BZ:
default: nvUnreachable();
}
}
}
static void read_header(Bits &in, ComprEndpts endpts[NREGIONS_ONE], Pattern &p)
{
// reading isn't quite symmetric with writing -- we don't know the encoding until we decode the mode
int mode = in.read(2);
if (mode != 0x00 && mode != 0x01)
mode = (in.read(3) << 2) | mode;
int pat_index = mode_to_pat[mode];
nvDebugCheck (pat_index >= 0 && pat_index < NPATTERNS);
nvDebugCheck (in.getptr() == patterns[pat_index].modebits);
p = patterns[pat_index];
int d;
int rw, rx;
int gw, gx;
int bw, bx;
d = 0;
rw = rx = 0;
gw = gx = 0;
bw = bx = 0;
int ptr = int(strlen(p.encoding));
while (ptr)
{
Field field;
int endbit, len;
// !!!UNDONE: get rid of string parsing!!!
Utils::parse(p.encoding, ptr, field, endbit, len);
switch(field)
{
case FIELD_M: break; // already processed so ignore
case FIELD_RW: rw |= in.read(len) << endbit; break;
case FIELD_RX: rx |= in.read(len) << endbit; break;
case FIELD_GW: gw |= in.read(len) << endbit; break;
case FIELD_GX: gx |= in.read(len) << endbit; break;
case FIELD_BW: bw |= in.read(len) << endbit; break;
case FIELD_BX: bx |= in.read(len) << endbit; break;
case FIELD_D:
case FIELD_RY:
case FIELD_RZ:
case FIELD_GY:
case FIELD_GZ:
case FIELD_BY:
case FIELD_BZ:
default: nvUnreachable();
}
}
nvDebugCheck (in.getptr() == 128 - 63);
endpts[0].A[0] = rw; endpts[0].B[0] = rx;
endpts[0].A[1] = gw; endpts[0].B[1] = gx;
endpts[0].A[2] = bw; endpts[0].B[2] = bx;
}
// compress index 0
static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
{
for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
{
int x = POS_TO_X(pos);
int y = POS_TO_Y(pos);
out.write(indices[y][x], INDEXBITS - ((pos == 0) ? 1 : 0));
}
}
static void emit_block(const ComprEndpts endpts[NREGIONS_ONE], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
{
Bits out(block, ZOH::BITSIZE);
write_header(endpts, p, out);
write_indices(indices, shapeindex, out);
nvDebugCheck(out.getptr() == ZOH::BITSIZE);
}
static void generate_palette_quantized(const IntEndpts &endpts, int prec, Vector3 palette[NINDICES])
{
// scale endpoints
int a, b; // really need a IntVector3...
a = Utils::unquantize(endpts.A[0], prec);
b = Utils::unquantize(endpts.B[0], prec);
// interpolate
for (int i = 0; i < NINDICES; ++i)
palette[i].x = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
a = Utils::unquantize(endpts.A[1], prec);
b = Utils::unquantize(endpts.B[1], prec);
// interpolate
for (int i = 0; i < NINDICES; ++i)
palette[i].y = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
a = Utils::unquantize(endpts.A[2], prec);
b = Utils::unquantize(endpts.B[2], prec);
// interpolate
for (int i = 0; i < NINDICES; ++i)
palette[i].z = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
}
// position 0 was compressed
static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
{
for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
{
int x = POS_TO_X(pos);
int y = POS_TO_Y(pos);
indices[y][x]= in.read(INDEXBITS - ((pos == 0) ? 1 : 0));
}
}
void ZOH::decompressone(const char *block, Tile &t)
{
Bits in(block, ZOH::BITSIZE);
Pattern p;
IntEndpts endpts[NREGIONS_ONE];
ComprEndpts compr_endpts[NREGIONS_ONE];
read_header(in, compr_endpts, p);
int shapeindex = 0; // only one shape
decompress_endpts(compr_endpts, endpts, p);
Vector3 palette[NREGIONS_ONE][NINDICES];
for (int r = 0; r < NREGIONS_ONE; ++r)
generate_palette_quantized(endpts[r], p.chan[0].prec[0], &palette[r][0]);
// read indices
int indices[Tile::TILE_H][Tile::TILE_W];
read_indices(in, shapeindex, indices);
nvDebugCheck(in.getptr() == ZOH::BITSIZE);
// lookup
for (int y = 0; y < Tile::TILE_H; y++)
for (int x = 0; x < Tile::TILE_W; x++)
t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
}
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
static float map_colors(const Vector3 colors[], const float importance[], int np, const IntEndpts &endpts, int prec)
{
Vector3 palette[NINDICES];
float toterr = 0;
Vector3 err;
generate_palette_quantized(endpts, prec, palette);
for (int i = 0; i < np; ++i)
{
float err, besterr;
besterr = Utils::norm(colors[i], palette[0]) * importance[i];
for (int j = 1; j < NINDICES && besterr > 0; ++j)
{
err = Utils::norm(colors[i], palette[j]) * importance[i];
if (err > besterr) // error increased, so we're done searching
break;
if (err < besterr)
besterr = err;
}
toterr += besterr;
}
return toterr;
}
// assign indices given a tile, shape, and quantized endpoints, return toterr for each region
static void assign_indices(const Tile &tile, int shapeindex, IntEndpts endpts[NREGIONS_ONE], int prec,
int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS_ONE])
{
// build list of possibles
Vector3 palette[NREGIONS_ONE][NINDICES];
for (int region = 0; region < NREGIONS_ONE; ++region)
{
generate_palette_quantized(endpts[region], prec, &palette[region][0]);
toterr[region] = 0;
}
Vector3 err;
for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++)
{
int region = REGION(x,y,shapeindex);
float err, besterr;
besterr = Utils::norm(tile.data[y][x], palette[region][0]);
indices[y][x] = 0;
for (int i = 1; i < NINDICES && besterr > 0; ++i)
{
err = Utils::norm(tile.data[y][x], palette[region][i]);
if (err > besterr) // error increased, so we're done searching
break;
if (err < besterr)
{
besterr = err;
indices[y][x] = i;
}
}
toterr[region] += besterr;
}
}
static float perturb_one(const Vector3 colors[], const float importance[], int np, int ch, int prec, const IntEndpts &old_endpts, IntEndpts &new_endpts,
float old_err, int do_b)
{
// we have the old endpoints: old_endpts
// we have the perturbed endpoints: new_endpts
// we have the temporary endpoints: temp_endpts
IntEndpts temp_endpts;
float min_err = old_err; // start with the best current error
int beststep;
// copy real endpoints so we can perturb them
for (int i=0; i<NCHANNELS; ++i) { temp_endpts.A[i] = new_endpts.A[i] = old_endpts.A[i]; temp_endpts.B[i] = new_endpts.B[i] = old_endpts.B[i]; }
// do a logarithmic search for the best error for this endpoint (which)
for (int step = 1 << (prec-1); step; step >>= 1)
{
bool improved = false;
for (int sign = -1; sign <= 1; sign += 2)
{
if (do_b == 0)
{
temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
continue;
}
else
{
temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
continue;
}
float err = map_colors(colors, importance, np, temp_endpts, prec);
if (err < min_err)
{
improved = true;
min_err = err;
beststep = sign * step;
}
}
// if this was an improvement, move the endpoint and continue search from there
if (improved)
{
if (do_b == 0)
new_endpts.A[ch] += beststep;
else
new_endpts.B[ch] += beststep;
}
}
return min_err;
}
static void optimize_one(const Vector3 colors[], const float importance[], int np, float orig_err, const IntEndpts &orig_endpts, int prec, IntEndpts &opt_endpts)
{
float opt_err = orig_err;
for (int ch = 0; ch < NCHANNELS; ++ch)
{
opt_endpts.A[ch] = orig_endpts.A[ch];
opt_endpts.B[ch] = orig_endpts.B[ch];
}
/*
err0 = perturb(rgb0, delta0)
err1 = perturb(rgb1, delta1)
if (err0 < err1)
if (err0 >= initial_error) break
rgb0 += delta0
next = 1
else
if (err1 >= initial_error) break
rgb1 += delta1
next = 0
initial_err = map()
for (;;)
err = perturb(next ? rgb1:rgb0, delta)
if (err >= initial_err) break
next? rgb1 : rgb0 += delta
initial_err = err
*/
IntEndpts new_a, new_b;
IntEndpts new_endpt;
int do_b;
// now optimize each channel separately
for (int ch = 0; ch < NCHANNELS; ++ch)
{
// figure out which endpoint when perturbed gives the most improvement and start there
// if we just alternate, we can easily end up in a local minima
float err0 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_a, opt_err, 0); // perturb endpt A
float err1 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_b, opt_err, 1); // perturb endpt B
if (err0 < err1)
{
if (err0 >= opt_err)
continue;
opt_endpts.A[ch] = new_a.A[ch];
opt_err = err0;
do_b = 1; // do B next
}
else
{
if (err1 >= opt_err)
continue;
opt_endpts.B[ch] = new_b.B[ch];
opt_err = err1;
do_b = 0; // do A next
}
// now alternate endpoints and keep trying until there is no improvement
for (;;)
{
float err = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_endpt, opt_err, do_b);
if (err >= opt_err)
break;
if (do_b == 0)
opt_endpts.A[ch] = new_endpt.A[ch];
else
opt_endpts.B[ch] = new_endpt.B[ch];
opt_err = err;
do_b = 1 - do_b; // now move the other endpoint
}
}
}
static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS_ONE],
const IntEndpts orig_endpts[NREGIONS_ONE], int prec, IntEndpts opt_endpts[NREGIONS_ONE])
{
Vector3 pixels[Tile::TILE_TOTAL];
float importance[Tile::TILE_TOTAL];
float err = 0;
for (int region=0; region<NREGIONS_ONE; ++region)
{
// collect the pixels in the region
int np = 0;
for (int y = 0; y < tile.size_y; y++) {
for (int x = 0; x < tile.size_x; x++) {
if (REGION(x, y, shapeindex) == region) {
pixels[np] = tile.data[y][x];
importance[np] = tile.importance_map[y][x];
++np;
}
}
}
optimize_one(pixels, importance, np, orig_err[region], orig_endpts[region], prec, opt_endpts[region]);
}
}
/* optimization algorithm
for each pattern
convert endpoints using pattern precision
assign indices and get initial error
compress indices (and possibly reorder endpoints)
transform endpoints
if transformed endpoints fit pattern
get original endpoints back
optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
compress new indices
transform new endpoints
if new endpoints fit pattern AND if error is improved
emit compressed block with new data
else
emit compressed block with original data // to try to preserve maximum endpoint precision
*/
float ZOH::refineone(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_ONE], char *block)
{
float orig_err[NREGIONS_ONE], opt_err[NREGIONS_ONE], orig_toterr, opt_toterr;
IntEndpts orig_endpts[NREGIONS_ONE], opt_endpts[NREGIONS_ONE];
ComprEndpts compr_orig[NREGIONS_ONE], compr_opt[NREGIONS_ONE];
int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
for (int sp = 0; sp < NPATTERNS; ++sp)
{
// precisions for all channels need to be the same
for (int i=1; i<NCHANNELS; ++i) nvDebugCheck (patterns[sp].chan[0].prec[0] == patterns[sp].chan[i].prec[0]);
quantize_endpts(endpts, patterns[sp].chan[0].prec[0], orig_endpts);
assign_indices(tile, shapeindex_best, orig_endpts, patterns[sp].chan[0].prec[0], orig_indices, orig_err);
swap_indices(orig_endpts, orig_indices, shapeindex_best);
compress_endpts(orig_endpts, compr_orig, patterns[sp]);
if (endpts_fit(orig_endpts, compr_orig, patterns[sp]))
{
optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, patterns[sp].chan[0].prec[0], opt_endpts);
assign_indices(tile, shapeindex_best, opt_endpts, patterns[sp].chan[0].prec[0], opt_indices, opt_err);
swap_indices(opt_endpts, opt_indices, shapeindex_best);
compress_endpts(opt_endpts, compr_opt, patterns[sp]);
orig_toterr = opt_toterr = 0;
for (int i=0; i < NREGIONS_ONE; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
if (endpts_fit(opt_endpts, compr_opt, patterns[sp]) && opt_toterr < orig_toterr)
{
emit_block(compr_opt, shapeindex_best, patterns[sp], opt_indices, block);
return opt_toterr;
}
else
{
// either it stopped fitting when we optimized it, or there was no improvement
// so go back to the unoptimized endpoints which we know will fit
emit_block(compr_orig, shapeindex_best, patterns[sp], orig_indices, block);
return orig_toterr;
}
}
}
nvAssert (false); // "No candidate found, should never happen (refineone.)";
return FLT_MAX;
}
static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS_ONE], Vector3 palette[NREGIONS_ONE][NINDICES])
{
for (int region = 0; region < NREGIONS_ONE; ++region)
for (int i = 0; i < NINDICES; ++i)
palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, DENOM);
}
// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS_ONE])
{
// build list of possibles
Vector3 palette[NREGIONS_ONE][NINDICES];
generate_palette_unquantized(endpts, palette);
float toterr = 0;
Vector3 err;
for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++)
{
int region = REGION(x,y,shapeindex);
float err, besterr;
besterr = Utils::norm(tile.data[y][x], palette[region][0]) * tile.importance_map[y][x];
for (int i = 1; i < NINDICES && besterr > 0; ++i)
{
err = Utils::norm(tile.data[y][x], palette[region][i]) * tile.importance_map[y][x];
if (err > besterr) // error increased, so we're done searching
break;
if (err < besterr)
besterr = err;
}
toterr += besterr;
}
return toterr;
}
float ZOH::roughone(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS_ONE])
{
for (int region=0; region<NREGIONS_ONE; ++region)
{
int np = 0;
Vector3 colors[Tile::TILE_TOTAL];
Vector3 mean(0,0,0);
for (int y = 0; y < tile.size_y; y++) {
for (int x = 0; x < tile.size_x; x++) {
if (REGION(x,y,shapeindex) == region)
{
colors[np] = tile.data[y][x];
mean += tile.data[y][x];
++np;
}
}
}
// handle simple cases
if (np == 0)
{
Vector3 zero(0,0,0);
endpts[region].A = zero;
endpts[region].B = zero;
continue;
}
else if (np == 1)
{
endpts[region].A = colors[0];
endpts[region].B = colors[0];
continue;
}
else if (np == 2)
{
endpts[region].A = colors[0];
endpts[region].B = colors[1];
continue;
}
mean /= float(np);
Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
// project each pixel value along the principal direction
float minp = FLT_MAX, maxp = -FLT_MAX;
for (int i = 0; i < np; i++)
{
float dp = dot(colors[i]-mean, direction);
if (dp < minp) minp = dp;
if (dp > maxp) maxp = dp;
}
// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
endpts[region].A = mean + minp*direction;
endpts[region].B = mean + maxp*direction;
// clamp endpoints
// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
// shape based on endpoints being clamped
Utils::clamp(endpts[region].A);
Utils::clamp(endpts[region].B);
}
return map_colors(tile, shapeindex, endpts);
}
float ZOH::compressone(const Tile &t, char *block)
{
int shapeindex_best = 0;
FltEndpts endptsbest[NREGIONS_ONE], tempendpts[NREGIONS_ONE];
float msebest = FLT_MAX;
/*
collect the mse values that are within 5% of the best values
optimize each one and choose the best
*/
// hack for now -- just use the best value WORK
for (int i=0; i<NSHAPES && msebest>0.0; ++i)
{
float mse = roughone(t, i, tempendpts);
if (mse < msebest)
{
msebest = mse;
shapeindex_best = i;
memcpy(endptsbest, tempendpts, sizeof(endptsbest));
}
}
return refineone(t, shapeindex_best, endptsbest, block);
}

883
3rdparty/nvtt/bc6h/zohtwo.cpp vendored Normal file
View File

@@ -0,0 +1,883 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
// two regions zoh compress/decompress code
// Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis
/* optimization algorithm
get initial float endpoints
convert endpoints using 16 bit precision, transform, and get bit delta. choose likely endpoint compression candidates.
note that there will be 1 or 2 candidates; 2 will be chosen when the delta values are close to the max possible.
for each EC candidate in order from max precision to smaller precision
convert endpoints using the appropriate precision.
optimize the endpoints and minimize square error. save the error and index assignments. apply index compression as well.
(thus the endpoints and indices are in final form.)
transform and get bit delta.
if the bit delta fits, exit
if we ended up with no candidates somehow, choose the tail set of EC candidates and retry. this should happen hardly ever.
add a state variable to nvDebugCheck we only do this once.
convert to bit stream.
return the error.
Global optimization
order all tiles based on their errors
do something special for high-error tiles
the goal here is to try to avoid tiling artifacts. but I think this is a research problem. let's just generate an error image...
display an image that shows partitioning and precision selected for each tile
*/
#include "bits.h"
#include "tile.h"
#include "zoh.h"
#include "zoh_utils.h"
#include "nvmath/fitting.h"
#include "nvmath/vector.inl"
#include <string.h> // strlen
#include <float.h> // FLT_MAX
using namespace nv;
using namespace ZOH;
#define NINDICES 8
#define INDEXBITS 3
#define HIGH_INDEXBIT (1<<(INDEXBITS-1))
#define DENOM (NINDICES-1)
// WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like?
// i.e. can we search shapes in a particular order so we can see the global error minima easily and
// stop without having to touch all shapes?
#include "shapes_two.h"
// use only the first 32 available shapes
#undef NSHAPES
#undef SHAPEBITS
#define NSHAPES 32
#define SHAPEBITS 5
#define POS_TO_X(pos) ((pos)&3)
#define POS_TO_Y(pos) (((pos)>>2)&3)
#define NDELTA 4
struct Chanpat
{
int prec[NDELTA]; // precision pattern for one channel
};
struct Pattern
{
Chanpat chan[NCHANNELS]; // allow different bit patterns per channel -- but we still want constant precision per channel
int transformed; // if 0, deltas are unsigned and no transform; otherwise, signed and transformed
int mode; // associated mode value
int modebits; // number of mode bits
const char *encoding; // verilog description of encoding for this mode
};
#define MAXMODEBITS 5
#define MAXMODES (1<<MAXMODEBITS)
#define NPATTERNS 10
static const Pattern patterns[NPATTERNS] =
{
11,5,5,5, 11,4,4,4, 11,4,4,4, 1, 0x02, 5, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bz[1],bw[10],bx[3:0],gz[3:0],bz[0],gw[10],gx[3:0],gy[3:0],rw[10],rx[4:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
11,4,4,4, 11,5,5,5, 11,4,4,4, 1, 0x06, 5, "d[4:0],bz[3],gy[4],rz[3:0],bz[2],bz[0],ry[3:0],by[3:0],bz[1],bw[10],bx[3:0],gz[3:0],gw[10],gx[4:0],gy[3:0],gz[4],rw[10],rx[3:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
11,4,4,4, 11,4,4,4, 11,5,5,5, 1, 0x0a, 5, "d[4:0],bz[3],bz[4],rz[3:0],bz[2:1],ry[3:0],by[3:0],bw[10],bx[4:0],gz[3:0],bz[0],gw[10],gx[3:0],gy[3:0],by[4],rw[10],rx[3:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
10,5,5,5, 10,5,5,5, 10,5,5,5, 1, 0x00, 2, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bz[1],bx[4:0],gz[3:0],bz[0],gx[4:0],gy[3:0],gz[4],rx[4:0],bw[9:0],gw[9:0],rw[9:0],bz[4],by[4],gy[4],m[1:0]",
9,5,5,5, 9,5,5,5, 9,5,5,5, 1, 0x0e, 5, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bz[1],bx[4:0],gz[3:0],bz[0],gx[4:0],gy[3:0],gz[4],rx[4:0],bz[4],bw[8:0],gy[4],gw[8:0],by[4],rw[8:0],m[4:0]",
8,6,6,6, 8,5,5,5, 8,5,5,5, 1, 0x12, 5, "d[4:0],rz[5:0],ry[5:0],by[3:0],bz[1],bx[4:0],gz[3:0],bz[0],gx[4:0],gy[3:0],rx[5:0],bz[4:3],bw[7:0],gy[4],bz[2],gw[7:0],by[4],gz[4],rw[7:0],m[4:0]",
8,5,5,5, 8,6,6,6, 8,5,5,5, 1, 0x16, 5, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bz[1],bx[4:0],gz[3:0],gx[5:0],gy[3:0],gz[4],rx[4:0],bz[4],gz[5],bw[7:0],gy[4],gy[5],gw[7:0],by[4],bz[0],rw[7:0],m[4:0]",
8,5,5,5, 8,5,5,5, 8,6,6,6, 1, 0x1a, 5, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bx[5:0],gz[3:0],bz[0],gx[4:0],gy[3:0],gz[4],rx[4:0],bz[4],bz[5],bw[7:0],gy[4],by[5],gw[7:0],by[4],bz[1],rw[7:0],m[4:0]",
7,6,6,6, 7,6,6,6, 7,6,6,6, 1, 0x01, 2, "d[4:0],rz[5:0],ry[5:0],by[3:0],bx[5:0],gz[3:0],gx[5:0],gy[3:0],rx[5:0],bz[4],bz[5],bz[3],bw[6:0],gy[4],bz[2],by[5],gw[6:0],by[4],bz[1:0],rw[6:0],gz[5:4],gy[5],m[1:0]",
6,6,6,6, 6,6,6,6, 6,6,6,6, 0, 0x1e, 5, "d[4:0],rz[5:0],ry[5:0],by[3:0],bx[5:0],gz[3:0],gx[5:0],gy[3:0],rx[5:0],bz[4],bz[5],bz[3],gz[5],bw[5:0],gy[4],bz[2],by[5],gy[5],gw[5:0],by[4],bz[1:0],gz[4],rw[5:0],m[4:0]",
};
// mapping of mode to the corresponding index in pattern
// UNUSED ZOH MODES are 0x13, 0x17, 0x1b, 0x1f -- return -2 for these
static const int mode_to_pat[MAXMODES] = {
3, // 0x00
8, // 0x01
0, // 0x02
-1,-1,-1,
1, // 0x06
-1,-1,-1,
2, // 0x0a
-1,-1,-1,
4, // 0x0e
-1,-1,-1,
5, // 0x12
-2,-1,-1,
6, // 0x16
-2,-1,-1,
7, // 0x1a
-2,-1,-1,
9, // 0x1e
-2
};
#define R_0(ep) (ep)[0].A[i]
#define R_1(ep) (ep)[0].B[i]
#define R_2(ep) (ep)[1].A[i]
#define R_3(ep) (ep)[1].B[i]
#define MASK(n) ((1<<(n))-1)
// compress endpoints
static void compress_endpts(const IntEndpts in[NREGIONS_TWO], ComprEndpts out[NREGIONS_TWO], const Pattern &p)
{
if (p.transformed)
{
for (int i=0; i<NCHANNELS; ++i)
{
R_0(out) = R_0(in) & MASK(p.chan[i].prec[0]);
R_1(out) = (R_1(in) - R_0(in)) & MASK(p.chan[i].prec[1]);
R_2(out) = (R_2(in) - R_0(in)) & MASK(p.chan[i].prec[2]);
R_3(out) = (R_3(in) - R_0(in)) & MASK(p.chan[i].prec[3]);
}
}
else
{
for (int i=0; i<NCHANNELS; ++i)
{
R_0(out) = R_0(in) & MASK(p.chan[i].prec[0]);
R_1(out) = R_1(in) & MASK(p.chan[i].prec[1]);
R_2(out) = R_2(in) & MASK(p.chan[i].prec[2]);
R_3(out) = R_3(in) & MASK(p.chan[i].prec[3]);
}
}
}
// decompress endpoints
static void decompress_endpts(const ComprEndpts in[NREGIONS_TWO], IntEndpts out[NREGIONS_TWO], const Pattern &p)
{
bool issigned = Utils::FORMAT == SIGNED_F16;
if (p.transformed)
{
for (int i=0; i<NCHANNELS; ++i)
{
R_0(out) = issigned ? SIGN_EXTEND(R_0(in),p.chan[i].prec[0]) : R_0(in);
int t;
t = SIGN_EXTEND(R_1(in), p.chan[i].prec[1]);
t = (t + R_0(in)) & MASK(p.chan[i].prec[0]);
R_1(out) = issigned ? SIGN_EXTEND(t,p.chan[i].prec[0]) : t;
t = SIGN_EXTEND(R_2(in), p.chan[i].prec[2]);
t = (t + R_0(in)) & MASK(p.chan[i].prec[0]);
R_2(out) = issigned ? SIGN_EXTEND(t,p.chan[i].prec[0]) : t;
t = SIGN_EXTEND(R_3(in), p.chan[i].prec[3]);
t = (t + R_0(in)) & MASK(p.chan[i].prec[0]);
R_3(out) = issigned ? SIGN_EXTEND(t,p.chan[i].prec[0]) : t;
}
}
else
{
for (int i=0; i<NCHANNELS; ++i)
{
R_0(out) = issigned ? SIGN_EXTEND(R_0(in),p.chan[i].prec[0]) : R_0(in);
R_1(out) = issigned ? SIGN_EXTEND(R_1(in),p.chan[i].prec[1]) : R_1(in);
R_2(out) = issigned ? SIGN_EXTEND(R_2(in),p.chan[i].prec[2]) : R_2(in);
R_3(out) = issigned ? SIGN_EXTEND(R_3(in),p.chan[i].prec[3]) : R_3(in);
}
}
}
static void quantize_endpts(const FltEndpts endpts[NREGIONS_TWO], int prec, IntEndpts q_endpts[NREGIONS_TWO])
{
for (int region = 0; region < NREGIONS_TWO; ++region)
{
q_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, prec);
q_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, prec);
q_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, prec);
q_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, prec);
q_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, prec);
q_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, prec);
}
}
// swap endpoints as needed to ensure that the indices at index_positions have a 0 high-order bit
static void swap_indices(IntEndpts endpts[NREGIONS_TWO], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
{
for (int region = 0; region < NREGIONS_TWO; ++region)
{
int position = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,region);
int x = POS_TO_X(position);
int y = POS_TO_Y(position);
nvDebugCheck(REGION(x,y,shapeindex) == region); // double check the table
if (indices[y][x] & HIGH_INDEXBIT)
{
// high bit is set, swap the endpts and indices for this region
int t;
for (int i=0; i<NCHANNELS; ++i)
{
t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t;
}
for (int y = 0; y < Tile::TILE_H; y++)
for (int x = 0; x < Tile::TILE_W; x++)
if (REGION(x,y,shapeindex) == region)
indices[y][x] = NINDICES - 1 - indices[y][x];
}
}
}
// endpoints fit only if the compression was lossless
static bool endpts_fit(const IntEndpts orig[NREGIONS_TWO], const ComprEndpts compressed[NREGIONS_TWO], const Pattern &p)
{
IntEndpts uncompressed[NREGIONS_TWO];
decompress_endpts(compressed, uncompressed, p);
for (int j=0; j<NREGIONS_TWO; ++j)
{
for (int i=0; i<NCHANNELS; ++i)
{
if (orig[j].A[i] != uncompressed[j].A[i]) return false;
if (orig[j].B[i] != uncompressed[j].B[i]) return false;
}
}
return true;
}
static void write_header(const ComprEndpts endpts[NREGIONS_TWO], int shapeindex, const Pattern &p, Bits &out)
{
// interpret the verilog backwards and process it
int m = p.mode;
int d = shapeindex;
int rw = endpts[0].A[0], rx = endpts[0].B[0], ry = endpts[1].A[0], rz = endpts[1].B[0];
int gw = endpts[0].A[1], gx = endpts[0].B[1], gy = endpts[1].A[1], gz = endpts[1].B[1];
int bw = endpts[0].A[2], bx = endpts[0].B[2], by = endpts[1].A[2], bz = endpts[1].B[2];
int ptr = int(strlen(p.encoding));
while (ptr)
{
Field field;
int endbit, len;
// !!!UNDONE: get rid of string parsing!!!
Utils::parse(p.encoding, ptr, field, endbit, len);
switch(field)
{
case FIELD_M: out.write( m >> endbit, len); break;
case FIELD_D: out.write( d >> endbit, len); break;
case FIELD_RW: out.write(rw >> endbit, len); break;
case FIELD_RX: out.write(rx >> endbit, len); break;
case FIELD_RY: out.write(ry >> endbit, len); break;
case FIELD_RZ: out.write(rz >> endbit, len); break;
case FIELD_GW: out.write(gw >> endbit, len); break;
case FIELD_GX: out.write(gx >> endbit, len); break;
case FIELD_GY: out.write(gy >> endbit, len); break;
case FIELD_GZ: out.write(gz >> endbit, len); break;
case FIELD_BW: out.write(bw >> endbit, len); break;
case FIELD_BX: out.write(bx >> endbit, len); break;
case FIELD_BY: out.write(by >> endbit, len); break;
case FIELD_BZ: out.write(bz >> endbit, len); break;
default: nvUnreachable();
}
}
}
static bool read_header(Bits &in, ComprEndpts endpts[NREGIONS_TWO], int &shapeindex, Pattern &p)
{
// reading isn't quite symmetric with writing -- we don't know the encoding until we decode the mode
int mode = in.read(2);
if (mode != 0x00 && mode != 0x01)
mode = (in.read(3) << 2) | mode;
int pat_index = mode_to_pat[mode];
if (pat_index == -2)
return false; // reserved mode found
nvDebugCheck (pat_index >= 0 && pat_index < NPATTERNS);
nvDebugCheck (in.getptr() == patterns[pat_index].modebits);
p = patterns[pat_index];
int d;
int rw, rx, ry, rz;
int gw, gx, gy, gz;
int bw, bx, by, bz;
d = 0;
rw = rx = ry = rz = 0;
gw = gx = gy = gz = 0;
bw = bx = by = bz = 0;
int ptr = int(strlen(p.encoding));
while (ptr)
{
Field field;
int endbit, len;
// !!!UNDONE: get rid of string parsing!!!
Utils::parse(p.encoding, ptr, field, endbit, len);
switch(field)
{
case FIELD_M: break; // already processed so ignore
case FIELD_D: d |= in.read(len) << endbit; break;
case FIELD_RW: rw |= in.read(len) << endbit; break;
case FIELD_RX: rx |= in.read(len) << endbit; break;
case FIELD_RY: ry |= in.read(len) << endbit; break;
case FIELD_RZ: rz |= in.read(len) << endbit; break;
case FIELD_GW: gw |= in.read(len) << endbit; break;
case FIELD_GX: gx |= in.read(len) << endbit; break;
case FIELD_GY: gy |= in.read(len) << endbit; break;
case FIELD_GZ: gz |= in.read(len) << endbit; break;
case FIELD_BW: bw |= in.read(len) << endbit; break;
case FIELD_BX: bx |= in.read(len) << endbit; break;
case FIELD_BY: by |= in.read(len) << endbit; break;
case FIELD_BZ: bz |= in.read(len) << endbit; break;
default: nvUnreachable();
}
}
nvDebugCheck (in.getptr() == 128 - 46);
shapeindex = d;
endpts[0].A[0] = rw; endpts[0].B[0] = rx; endpts[1].A[0] = ry; endpts[1].B[0] = rz;
endpts[0].A[1] = gw; endpts[0].B[1] = gx; endpts[1].A[1] = gy; endpts[1].B[1] = gz;
endpts[0].A[2] = bw; endpts[0].B[2] = bx; endpts[1].A[2] = by; endpts[1].B[2] = bz;
return true;
}
static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
{
int positions[NREGIONS_TWO];
for (int r = 0; r < NREGIONS_TWO; ++r)
positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
{
int x = POS_TO_X(pos);
int y = POS_TO_Y(pos);
bool match = false;
for (int r = 0; r < NREGIONS_TWO; ++r)
if (positions[r] == pos) { match = true; break; }
out.write(indices[y][x], INDEXBITS - (match ? 1 : 0));
}
}
static void emit_block(const ComprEndpts compr_endpts[NREGIONS_TWO], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
{
Bits out(block, ZOH::BITSIZE);
write_header(compr_endpts, shapeindex, p, out);
write_indices(indices, shapeindex, out);
nvDebugCheck(out.getptr() == ZOH::BITSIZE);
}
static void generate_palette_quantized(const IntEndpts &endpts, int prec, Vector3 palette[NINDICES])
{
// scale endpoints
int a, b; // really need a IntVector3...
a = Utils::unquantize(endpts.A[0], prec);
b = Utils::unquantize(endpts.B[0], prec);
// interpolate
for (int i = 0; i < NINDICES; ++i)
palette[i].x = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
a = Utils::unquantize(endpts.A[1], prec);
b = Utils::unquantize(endpts.B[1], prec);
// interpolate
for (int i = 0; i < NINDICES; ++i)
palette[i].y = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
a = Utils::unquantize(endpts.A[2], prec);
b = Utils::unquantize(endpts.B[2], prec);
// interpolate
for (int i = 0; i < NINDICES; ++i)
palette[i].z = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
}
static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
{
int positions[NREGIONS_TWO];
for (int r = 0; r < NREGIONS_TWO; ++r)
positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
{
int x = POS_TO_X(pos);
int y = POS_TO_Y(pos);
bool match = false;
for (int r = 0; r < NREGIONS_TWO; ++r)
if (positions[r] == pos) { match = true; break; }
indices[y][x]= in.read(INDEXBITS - (match ? 1 : 0));
}
}
void ZOH::decompresstwo(const char *block, Tile &t)
{
Bits in(block, ZOH::BITSIZE);
Pattern p;
IntEndpts endpts[NREGIONS_TWO];
ComprEndpts compr_endpts[NREGIONS_TWO];
int shapeindex;
if (!read_header(in, compr_endpts, shapeindex, p))
{
// reserved mode, return all zeroes
for (int y = 0; y < Tile::TILE_H; y++)
for (int x = 0; x < Tile::TILE_W; x++)
t.data[y][x] = Vector3(0.0f);
return;
}
decompress_endpts(compr_endpts, endpts, p);
Vector3 palette[NREGIONS_TWO][NINDICES];
for (int r = 0; r < NREGIONS_TWO; ++r)
generate_palette_quantized(endpts[r], p.chan[0].prec[0], &palette[r][0]);
int indices[Tile::TILE_H][Tile::TILE_W];
read_indices(in, shapeindex, indices);
nvDebugCheck(in.getptr() == ZOH::BITSIZE);
// lookup
for (int y = 0; y < Tile::TILE_H; y++)
for (int x = 0; x < Tile::TILE_W; x++)
t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
}
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
static float map_colors(const Vector3 colors[], const float importance[], int np, const IntEndpts &endpts, int prec)
{
Vector3 palette[NINDICES];
float toterr = 0;
Vector3 err;
generate_palette_quantized(endpts, prec, palette);
for (int i = 0; i < np; ++i)
{
float err, besterr;
besterr = Utils::norm(colors[i], palette[0]) * importance[i];
for (int j = 1; j < NINDICES && besterr > 0; ++j)
{
err = Utils::norm(colors[i], palette[j]) * importance[i];
if (err > besterr) // error increased, so we're done searching
break;
if (err < besterr)
besterr = err;
}
toterr += besterr;
}
return toterr;
}
// assign indices given a tile, shape, and quantized endpoints, return toterr for each region
static void assign_indices(const Tile &tile, int shapeindex, IntEndpts endpts[NREGIONS_TWO], int prec,
int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS_TWO])
{
// build list of possibles
Vector3 palette[NREGIONS_TWO][NINDICES];
for (int region = 0; region < NREGIONS_TWO; ++region)
{
generate_palette_quantized(endpts[region], prec, &palette[region][0]);
toterr[region] = 0;
}
Vector3 err;
for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++)
{
int region = REGION(x,y,shapeindex);
float err, besterr;
besterr = Utils::norm(tile.data[y][x], palette[region][0]);
indices[y][x] = 0;
for (int i = 1; i < NINDICES && besterr > 0; ++i)
{
err = Utils::norm(tile.data[y][x], palette[region][i]);
if (err > besterr) // error increased, so we're done searching
break;
if (err < besterr)
{
besterr = err;
indices[y][x] = i;
}
}
toterr[region] += besterr;
}
}
static float perturb_one(const Vector3 colors[], const float importance[], int np, int ch, int prec, const IntEndpts &old_endpts, IntEndpts &new_endpts,
float old_err, int do_b)
{
// we have the old endpoints: old_endpts
// we have the perturbed endpoints: new_endpts
// we have the temporary endpoints: temp_endpts
IntEndpts temp_endpts;
float min_err = old_err; // start with the best current error
int beststep;
// copy real endpoints so we can perturb them
for (int i=0; i<NCHANNELS; ++i) { temp_endpts.A[i] = new_endpts.A[i] = old_endpts.A[i]; temp_endpts.B[i] = new_endpts.B[i] = old_endpts.B[i]; }
// do a logarithmic search for the best error for this endpoint (which)
for (int step = 1 << (prec-1); step; step >>= 1)
{
bool improved = false;
for (int sign = -1; sign <= 1; sign += 2)
{
if (do_b == 0)
{
temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
continue;
}
else
{
temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
continue;
}
float err = map_colors(colors, importance, np, temp_endpts, prec);
if (err < min_err)
{
improved = true;
min_err = err;
beststep = sign * step;
}
}
// if this was an improvement, move the endpoint and continue search from there
if (improved)
{
if (do_b == 0)
new_endpts.A[ch] += beststep;
else
new_endpts.B[ch] += beststep;
}
}
return min_err;
}
static void optimize_one(const Vector3 colors[], const float importance[], int np, float orig_err, const IntEndpts &orig_endpts, int prec, IntEndpts &opt_endpts)
{
float opt_err = orig_err;
for (int ch = 0; ch < NCHANNELS; ++ch)
{
opt_endpts.A[ch] = orig_endpts.A[ch];
opt_endpts.B[ch] = orig_endpts.B[ch];
}
/*
err0 = perturb(rgb0, delta0)
err1 = perturb(rgb1, delta1)
if (err0 < err1)
if (err0 >= initial_error) break
rgb0 += delta0
next = 1
else
if (err1 >= initial_error) break
rgb1 += delta1
next = 0
initial_err = map()
for (;;)
err = perturb(next ? rgb1:rgb0, delta)
if (err >= initial_err) break
next? rgb1 : rgb0 += delta
initial_err = err
*/
IntEndpts new_a, new_b;
IntEndpts new_endpt;
int do_b;
// now optimize each channel separately
for (int ch = 0; ch < NCHANNELS; ++ch)
{
// figure out which endpoint when perturbed gives the most improvement and start there
// if we just alternate, we can easily end up in a local minima
float err0 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_a, opt_err, 0); // perturb endpt A
float err1 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_b, opt_err, 1); // perturb endpt B
if (err0 < err1)
{
if (err0 >= opt_err)
continue;
opt_endpts.A[ch] = new_a.A[ch];
opt_err = err0;
do_b = 1; // do B next
}
else
{
if (err1 >= opt_err)
continue;
opt_endpts.B[ch] = new_b.B[ch];
opt_err = err1;
do_b = 0; // do A next
}
// now alternate endpoints and keep trying until there is no improvement
for (;;)
{
float err = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_endpt, opt_err, do_b);
if (err >= opt_err)
break;
if (do_b == 0)
opt_endpts.A[ch] = new_endpt.A[ch];
else
opt_endpts.B[ch] = new_endpt.B[ch];
opt_err = err;
do_b = 1 - do_b; // now move the other endpoint
}
}
}
static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS_TWO],
const IntEndpts orig_endpts[NREGIONS_TWO], int prec, IntEndpts opt_endpts[NREGIONS_TWO])
{
Vector3 pixels[Tile::TILE_TOTAL];
float importance[Tile::TILE_TOTAL];
float err = 0;
for (int region=0; region<NREGIONS_TWO; ++region)
{
// collect the pixels in the region
int np = 0;
for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++)
if (REGION(x,y,shapeindex) == region)
{
pixels[np] = tile.data[y][x];
importance[np] = tile.importance_map[y][x];
++np;
}
optimize_one(pixels, importance, np, orig_err[region], orig_endpts[region], prec, opt_endpts[region]);
}
}
/* optimization algorithm
for each pattern
convert endpoints using pattern precision
assign indices and get initial error
compress indices (and possibly reorder endpoints)
transform endpoints
if transformed endpoints fit pattern
get original endpoints back
optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
compress new indices
transform new endpoints
if new endpoints fit pattern AND if error is improved
emit compressed block with new data
else
emit compressed block with original data // to try to preserve maximum endpoint precision
*/
float ZOH::refinetwo(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_TWO], char *block)
{
float orig_err[NREGIONS_TWO], opt_err[NREGIONS_TWO], orig_toterr, opt_toterr;
IntEndpts orig_endpts[NREGIONS_TWO], opt_endpts[NREGIONS_TWO];
ComprEndpts compr_orig[NREGIONS_TWO], compr_opt[NREGIONS_TWO];
int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
for (int sp = 0; sp < NPATTERNS; ++sp)
{
// precisions for all channels need to be the same
for (int i=1; i<NCHANNELS; ++i) nvDebugCheck (patterns[sp].chan[0].prec[0] == patterns[sp].chan[i].prec[0]);
quantize_endpts(endpts, patterns[sp].chan[0].prec[0], orig_endpts);
assign_indices(tile, shapeindex_best, orig_endpts, patterns[sp].chan[0].prec[0], orig_indices, orig_err);
swap_indices(orig_endpts, orig_indices, shapeindex_best);
compress_endpts(orig_endpts, compr_orig, patterns[sp]);
if (endpts_fit(orig_endpts, compr_orig, patterns[sp]))
{
optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, patterns[sp].chan[0].prec[0], opt_endpts);
assign_indices(tile, shapeindex_best, opt_endpts, patterns[sp].chan[0].prec[0], opt_indices, opt_err);
swap_indices(opt_endpts, opt_indices, shapeindex_best);
compress_endpts(opt_endpts, compr_opt, patterns[sp]);
orig_toterr = opt_toterr = 0;
for (int i=0; i < NREGIONS_TWO; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
if (endpts_fit(opt_endpts, compr_opt, patterns[sp]) && opt_toterr < orig_toterr)
{
emit_block(compr_opt, shapeindex_best, patterns[sp], opt_indices, block);
return opt_toterr;
}
else
{
// either it stopped fitting when we optimized it, or there was no improvement
// so go back to the unoptimized endpoints which we know will fit
emit_block(compr_orig, shapeindex_best, patterns[sp], orig_indices, block);
return orig_toterr;
}
}
}
nvAssert(false); //throw "No candidate found, should never happen (refinetwo.)";
return FLT_MAX;
}
static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS_TWO], Vector3 palette[NREGIONS_TWO][NINDICES])
{
for (int region = 0; region < NREGIONS_TWO; ++region)
for (int i = 0; i < NINDICES; ++i)
palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, DENOM);
}
// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS_TWO])
{
// build list of possibles
Vector3 palette[NREGIONS_TWO][NINDICES];
generate_palette_unquantized(endpts, palette);
float toterr = 0;
Vector3 err;
for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++)
{
int region = REGION(x,y,shapeindex);
float err, besterr;
besterr = Utils::norm(tile.data[y][x], palette[region][0]) * tile.importance_map[y][x];
for (int i = 1; i < NINDICES && besterr > 0; ++i)
{
err = Utils::norm(tile.data[y][x], palette[region][i]) * tile.importance_map[y][x];
if (err > besterr) // error increased, so we're done searching
break;
if (err < besterr)
besterr = err;
}
toterr += besterr;
}
return toterr;
}
float ZOH::roughtwo(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS_TWO])
{
for (int region=0; region<NREGIONS_TWO; ++region)
{
int np = 0;
Vector3 colors[Tile::TILE_TOTAL];
Vector3 mean(0,0,0);
for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++)
if (REGION(x,y,shapeindex) == region)
{
colors[np] = tile.data[y][x];
mean += tile.data[y][x];
++np;
}
// handle simple cases
if (np == 0)
{
Vector3 zero(0,0,0);
endpts[region].A = zero;
endpts[region].B = zero;
continue;
}
else if (np == 1)
{
endpts[region].A = colors[0];
endpts[region].B = colors[0];
continue;
}
else if (np == 2)
{
endpts[region].A = colors[0];
endpts[region].B = colors[1];
continue;
}
mean /= float(np);
Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
// project each pixel value along the principal direction
float minp = FLT_MAX, maxp = -FLT_MAX;
for (int i = 0; i < np; i++)
{
float dp = dot(colors[i]-mean, direction);
if (dp < minp) minp = dp;
if (dp > maxp) maxp = dp;
}
// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
endpts[region].A = mean + minp*direction;
endpts[region].B = mean + maxp*direction;
// clamp endpoints
// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
// shape based on endpoints being clamped
Utils::clamp(endpts[region].A);
Utils::clamp(endpts[region].B);
}
return map_colors(tile, shapeindex, endpts);
}
float ZOH::compresstwo(const Tile &t, char *block)
{
int shapeindex_best = 0;
FltEndpts endptsbest[NREGIONS_TWO], tempendpts[NREGIONS_TWO];
float msebest = FLT_MAX;
/*
collect the mse values that are within 5% of the best values
optimize each one and choose the best
*/
// hack for now -- just use the best value WORK
for (int i=0; i<NSHAPES && msebest>0.0; ++i)
{
float mse = roughtwo(t, i, tempendpts);
if (mse < msebest)
{
msebest = mse;
shapeindex_best = i;
memcpy(endptsbest, tempendpts, sizeof(endptsbest));
}
}
return refinetwo(t, shapeindex_best, endptsbest, block);
}

264
3rdparty/nvtt/bc7/avpcl.cpp vendored Normal file
View File

@@ -0,0 +1,264 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
// the avpcl compressor and decompressor
#include "tile.h"
#include "avpcl.h"
#include "nvcore/debug.h"
#include "nvmath/vector.inl"
#include <string.h>
#include <float.h>
using namespace nv;
using namespace AVPCL;
// global flags
bool AVPCL::flag_premult = false;
bool AVPCL::flag_nonuniform = false;
bool AVPCL::flag_nonuniform_ati = false;
// global mode
bool AVPCL::mode_rgb = false; // true if image had constant alpha = 255
void AVPCL::compress(const Tile &t, char *block)
{
char tempblock[AVPCL::BLOCKSIZE];
float msebest = FLT_MAX;
float mse_mode0 = AVPCL::compress_mode0(t, tempblock); if(mse_mode0 < msebest) { msebest = mse_mode0; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
float mse_mode1 = AVPCL::compress_mode1(t, tempblock); if(mse_mode1 < msebest) { msebest = mse_mode1; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
float mse_mode2 = AVPCL::compress_mode2(t, tempblock); if(mse_mode2 < msebest) { msebest = mse_mode2; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
float mse_mode3 = AVPCL::compress_mode3(t, tempblock); if(mse_mode3 < msebest) { msebest = mse_mode3; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
float mse_mode4 = AVPCL::compress_mode4(t, tempblock); if(mse_mode4 < msebest) { msebest = mse_mode4; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
float mse_mode5 = AVPCL::compress_mode5(t, tempblock); if(mse_mode5 < msebest) { msebest = mse_mode5; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
float mse_mode6 = AVPCL::compress_mode6(t, tempblock); if(mse_mode6 < msebest) { msebest = mse_mode6; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
float mse_mode7 = AVPCL::compress_mode7(t, tempblock); if(mse_mode7 < msebest) { msebest = mse_mode7; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
/*if (errfile)
{
float errs[21];
int nerrs = 8;
errs[0] = mse_mode0;
errs[1] = mse_mode1;
errs[2] = mse_mode2;
errs[3] = mse_mode3;
errs[4] = mse_mode4;
errs[5] = mse_mode5;
errs[6] = mse_mode6;
errs[7] = mse_mode7;
if (fwrite(errs, sizeof(float), nerrs, errfile) != nerrs)
throw "Write error on error file";
}*/
}
/*
static int getbit(char *b, int start)
{
if (start < 0 || start >= 128) return 0; // out of range
int ix = start >> 3;
return (b[ix] & (1 << (start & 7))) != 0;
}
static int getbits(char *b, int start, int len)
{
int out = 0;
for (int i=0; i<len; ++i)
out |= getbit(b, start+i) << i;
return out;
}
static void setbit(char *b, int start, int bit)
{
if (start < 0 || start >= 128) return; // out of range
int ix = start >> 3;
if (bit & 1)
b[ix] |= (1 << (start & 7));
else
b[ix] &= ~(1 << (start & 7));
}
static void setbits(char *b, int start, int len, int bits)
{
for (int i=0; i<len; ++i)
setbit(b, start+i, bits >> i);
}
*/
void AVPCL::decompress(const char *cblock, Tile &t)
{
char block[AVPCL::BLOCKSIZE];
memcpy(block, cblock, AVPCL::BLOCKSIZE);
switch(getmode(block))
{
case 0: AVPCL::decompress_mode0(block, t); break;
case 1: AVPCL::decompress_mode1(block, t); break;
case 2: AVPCL::decompress_mode2(block, t); break;
case 3: AVPCL::decompress_mode3(block, t); break;
case 4: AVPCL::decompress_mode4(block, t); break;
case 5: AVPCL::decompress_mode5(block, t); break;
case 6: AVPCL::decompress_mode6(block, t); break;
case 7: AVPCL::decompress_mode7(block, t); break;
case 8: // return a black tile if you get a reserved mode
for (int y=0; y<Tile::TILE_H; ++y)
for (int x=0; x<Tile::TILE_W; ++x)
t.data[y][x].set(0, 0, 0, 0);
break;
default: nvUnreachable();
}
}
/*
void AVPCL::compress(string inf, string avpclf, string errf)
{
Array2D<RGBA> pixels;
int w, h;
char block[AVPCL::BLOCKSIZE];
Targa::read(inf, pixels, w, h);
FILE *avpclfile = fopen(avpclf.c_str(), "wb");
if (avpclfile == NULL) throw "Unable to open .avpcl file for write";
FILE *errfile = NULL;
if (errf != "")
{
errfile = fopen(errf.c_str(), "wb");
if (errfile == NULL) throw "Unable to open error file for write";
}
// Look at alpha channel and override the premult flag if alpha is constant (but only if premult is set)
if (AVPCL::flag_premult)
{
if (AVPCL::mode_rgb)
{
AVPCL::flag_premult = false;
cout << endl << "NOTE: Source image alpha is constant 255, turning off premultiplied-alpha error metric." << endl << endl;
}
}
// stuff for progress bar O.o
int ntiles = ((h+Tile::TILE_H-1)/Tile::TILE_H)*((w+Tile::TILE_W-1)/Tile::TILE_W);
int tilecnt = 0;
clock_t start, prev, cur;
start = prev = clock();
// convert to tiles and compress each tile
for (int y=0; y<h; y+=Tile::TILE_H)
{
int ysize = min(Tile::TILE_H, h-y);
for (int x=0; x<w; x+=Tile::TILE_W)
{
if ((tilecnt%100) == 0) { cur = clock(); printf("Progress %d of %d, %5.2f seconds per 100 tiles\r", tilecnt, ntiles, float(cur-prev)/CLOCKS_PER_SEC); fflush(stdout); prev = cur; }
int xsize = min(Tile::TILE_W, w-x);
Tile t(xsize, ysize);
t.insert(pixels, x, y);
AVPCL::compress(t, block, errfile);
if (fwrite(block, sizeof(char), AVPCL::BLOCKSIZE, avpclfile) != AVPCL::BLOCKSIZE)
throw "File error on write";
// progress bar
++tilecnt;
}
}
cur = clock();
printf("\nTotal time to compress: %.2f seconds\n\n", float(cur-start)/CLOCKS_PER_SEC); // advance to next line finally
if (fclose(avpclfile)) throw "Close failed on .avpcl file";
if (errfile && fclose(errfile)) throw "Close failed on error file";
}
static int str2int(std::string s)
{
int thing;
std::stringstream str (stringstream::in | stringstream::out);
str << s;
str >> thing;
return thing;
}
// avpcl file name is ...-w-h-RGB[A].avpcl, extract width and height
static void extract(string avpclf, int &w, int &h, bool &mode_rgb)
{
size_t n = avpclf.rfind('.', avpclf.length()-1);
size_t n1 = avpclf.rfind('-', n-1);
size_t n2 = avpclf.rfind('-', n1-1);
size_t n3 = avpclf.rfind('-', n2-1);
// ...-wwww-hhhh-RGB[A].avpcl
// ^ ^ ^ ^
// n3 n2 n1 n n3<n2<n1<n
string width = avpclf.substr(n3+1, n2-n3-1);
w = str2int(width);
string height = avpclf.substr(n2+1, n1-n2-1);
h = str2int(height);
string mode = avpclf.substr(n1+1, n-n1-1);
mode_rgb = mode == "RGB";
}
static int modehist[8];
static void stats(char block[AVPCL::BLOCKSIZE])
{
int m = AVPCL::getmode(block);
modehist[m]++;
}
static void printstats()
{
printf("\nMode histogram: "); for (int i=0; i<8; ++i) { printf("%d,", modehist[i]); }
printf("\n");
}
void AVPCL::decompress(string avpclf, string outf)
{
Array2D<RGBA> pixels;
int w, h;
char block[AVPCL::BLOCKSIZE];
extract(avpclf, w, h, AVPCL::mode_rgb);
FILE *avpclfile = fopen(avpclf.c_str(), "rb");
if (avpclfile == NULL) throw "Unable to open .avpcl file for read";
pixels.resizeErase(h, w);
// convert to tiles and decompress each tile
for (int y=0; y<h; y+=Tile::TILE_H)
{
int ysize = min(Tile::TILE_H, h-y);
for (int x=0; x<w; x+=Tile::TILE_W)
{
int xsize = min(Tile::TILE_W, w-x);
Tile t(xsize, ysize);
if (fread(block, sizeof(char), AVPCL::BLOCKSIZE, avpclfile) != AVPCL::BLOCKSIZE)
throw "File error on read";
stats(block); // collect statistics
AVPCL::decompress(block, t);
t.extract(pixels, x, y);
}
}
if (fclose(avpclfile)) throw "Close failed on .avpcl file";
Targa::write(outf, pixels, w, h);
printstats(); // print statistics
}
*/

99
3rdparty/nvtt/bc7/avpcl.h vendored Normal file
View File

@@ -0,0 +1,99 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
#ifndef _AVPCL_H
#define _AVPCL_H
#include "tile.h"
#include "bits.h"
#define DISABLE_EXHAUSTIVE 1 // define this if you don't want to spend a lot of time on exhaustive compression
#define USE_ZOH_INTERP 1 // use zoh interpolator, otherwise use exact avpcl interpolators
#define USE_ZOH_INTERP_ROUNDED 1 // use the rounded versions!
namespace AVPCL {
static const int NREGIONS_TWO = 2;
static const int NREGIONS_THREE = 3;
static const int BLOCKSIZE=16;
static const int BITSIZE=128;
// global flags
extern bool flag_premult;
extern bool flag_nonuniform;
extern bool flag_nonuniform_ati;
// global mode
extern bool mode_rgb; // true if image had constant alpha = 255
void compress(const Tile &t, char *block);
void decompress(const char *block, Tile &t);
float compress_mode0(const Tile &t, char *block);
void decompress_mode0(const char *block, Tile &t);
float compress_mode1(const Tile &t, char *block);
void decompress_mode1(const char *block, Tile &t);
float compress_mode2(const Tile &t, char *block);
void decompress_mode2(const char *block, Tile &t);
float compress_mode3(const Tile &t, char *block);
void decompress_mode3(const char *block, Tile &t);
float compress_mode4(const Tile &t, char *block);
void decompress_mode4(const char *block, Tile &t);
float compress_mode5(const Tile &t, char *block);
void decompress_mode5(const char *block, Tile &t);
float compress_mode6(const Tile &t, char *block);
void decompress_mode6(const char *block, Tile &t);
float compress_mode7(const Tile &t, char *block);
void decompress_mode7(const char *block, Tile &t);
inline int getmode(Bits &in)
{
int mode = 0;
if (in.read(1)) mode = 0;
else if (in.read(1)) mode = 1;
else if (in.read(1)) mode = 2;
else if (in.read(1)) mode = 3;
else if (in.read(1)) mode = 4;
else if (in.read(1)) mode = 5;
else if (in.read(1)) mode = 6;
else if (in.read(1)) mode = 7;
else mode = 8; // reserved
return mode;
}
inline int getmode(const char *block)
{
int bits = block[0], mode = 0;
if (bits & 1) mode = 0;
else if ((bits&3) == 2) mode = 1;
else if ((bits&7) == 4) mode = 2;
else if ((bits & 0xF) == 8) mode = 3;
else if ((bits & 0x1F) == 16) mode = 4;
else if ((bits & 0x3F) == 32) mode = 5;
else if ((bits & 0x7F) == 64) mode = 6;
else if ((bits & 0xFF) == 128) mode = 7;
else mode = 8; // reserved
return mode;
}
}
#endif

1066
3rdparty/nvtt/bc7/avpcl_mode0.cpp vendored Normal file

File diff suppressed because it is too large Load Diff

1047
3rdparty/nvtt/bc7/avpcl_mode1.cpp vendored Normal file

File diff suppressed because it is too large Load Diff

1004
3rdparty/nvtt/bc7/avpcl_mode2.cpp vendored Normal file

File diff suppressed because it is too large Load Diff

1059
3rdparty/nvtt/bc7/avpcl_mode3.cpp vendored Normal file

File diff suppressed because it is too large Load Diff

1214
3rdparty/nvtt/bc7/avpcl_mode4.cpp vendored Normal file

File diff suppressed because it is too large Load Diff

1216
3rdparty/nvtt/bc7/avpcl_mode5.cpp vendored Normal file

File diff suppressed because it is too large Load Diff

1055
3rdparty/nvtt/bc7/avpcl_mode6.cpp vendored Normal file

File diff suppressed because it is too large Load Diff

1094
3rdparty/nvtt/bc7/avpcl_mode7.cpp vendored Normal file

File diff suppressed because it is too large Load Diff

389
3rdparty/nvtt/bc7/avpcl_utils.cpp vendored Normal file
View File

@@ -0,0 +1,389 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
// Utility and common routines
#include "avpcl_utils.h"
#include "avpcl.h"
#include "nvmath/vector.inl"
#include <math.h>
using namespace nv;
using namespace AVPCL;
static const int denom7_weights[] = {0, 9, 18, 27, 37, 46, 55, 64}; // divided by 64
static const int denom15_weights[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64}; // divided by 64
int Utils::lerp(int a, int b, int i, int bias, int denom)
{
#ifdef USE_ZOH_INTERP
nvAssert (denom == 3 || denom == 7 || denom == 15);
nvAssert (i >= 0 && i <= denom);
nvAssert (bias >= 0 && bias <= denom/2);
nvAssert (a >= 0 && b >= 0);
int round = 0;
#ifdef USE_ZOH_INTERP_ROUNDED
round = 32;
#endif
switch (denom)
{
case 3: denom *= 5; i *= 5; // fall through to case 15
case 15:return (a*denom15_weights[denom-i] + b*denom15_weights[i] + round) >> 6;
case 7: return (a*denom7_weights[denom-i] + b*denom7_weights[i] + round) >> 6;
default: nvUnreachable(); return 0;
}
#else
return (((a)*((denom)-i)+(b)*(i)+(bias))/(denom)); // simple exact interpolation
#endif
}
Vector4 Utils::lerp(Vector4::Arg a, Vector4::Arg b, int i, int bias, int denom)
{
#ifdef USE_ZOH_INTERP
nvAssert (denom == 3 || denom == 7 || denom == 15);
nvAssert (i >= 0 && i <= denom);
nvAssert (bias >= 0 && bias <= denom/2);
// nvAssert (a >= 0 && b >= 0);
// no need to bias these as this is an exact division
switch (denom)
{
case 3: denom *= 5; i *= 5; // fall through to case 15
case 15:return (a*float(denom15_weights[denom-i]) + b*float(denom15_weights[i])) / 64.0f;
case 7: return (a*float(denom7_weights[denom-i]) + b*float(denom7_weights[i])) / 64.0f;
default: nvUnreachable(); return Vector4(0);
}
#else
return (((a)*((denom)-i)+(b)*(i)+(bias))/(denom)); // simple exact interpolation
#endif
}
int Utils::unquantize(int q, int prec)
{
int unq;
nvAssert (prec > 3); // we only want to do one replicate
#ifdef USE_ZOH_QUANT
if (prec >= 8)
unq = q;
else if (q == 0)
unq = 0;
else if (q == ((1<<prec)-1))
unq = 255;
else
unq = (q * 256 + 128) >> prec;
#else
// avpcl unquantizer -- bit replicate
unq = (q << (8-prec)) | (q >> (2*prec-8));
#endif
return unq;
}
// quantize to the best value -- i.e., minimize unquantize error
int Utils::quantize(float value, int prec)
{
int q, unq;
nvAssert (prec > 3); // we only want to do one replicate
unq = (int)floor(value + 0.5f);
nvAssert (unq <= 255);
#ifdef USE_ZOH_QUANT
q = (prec >= 8) ? unq : (unq << prec) / 256;
#else
// avpcl quantizer -- scale properly for best possible bit-replicated result
q = (unq * ((1<<prec)-1) + 127)/255;
#endif
nvAssert (q >= 0 && q < (1 << prec));
return q;
}
float Utils::metric4(Vector4::Arg a, Vector4::Arg b)
{
Vector4 err = a - b;
// if nonuniform, select weights and weigh away
if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
{
float rwt, gwt, bwt;
if (AVPCL::flag_nonuniform)
{
rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
}
else /*if (AVPCL::flag_nonuniform_ati)*/
{
rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
}
// weigh the components
err.x *= rwt;
err.y *= gwt;
err.z *= bwt;
}
return lengthSquared(err);
}
// WORK -- implement rotatemode for the below -- that changes where the rwt, gwt, and bwt's go.
float Utils::metric3(Vector3::Arg a, Vector3::Arg b, int rotatemode)
{
Vector3 err = a - b;
// if nonuniform, select weights and weigh away
if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
{
float rwt, gwt, bwt;
if (AVPCL::flag_nonuniform)
{
rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
}
else if (AVPCL::flag_nonuniform_ati)
{
rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
}
// adjust weights based on rotatemode
switch(rotatemode)
{
case ROTATEMODE_RGBA_RGBA: break;
case ROTATEMODE_RGBA_AGBR: rwt = 1.0f; break;
case ROTATEMODE_RGBA_RABG: gwt = 1.0f; break;
case ROTATEMODE_RGBA_RGAB: bwt = 1.0f; break;
default: nvUnreachable();
}
// weigh the components
err.x *= rwt;
err.y *= gwt;
err.z *= bwt;
}
return lengthSquared(err);
}
float Utils::metric1(const float a, const float b, int rotatemode)
{
float err = a - b;
// if nonuniform, select weights and weigh away
if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
{
float rwt, gwt, bwt, awt;
if (AVPCL::flag_nonuniform)
{
rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
}
else if (AVPCL::flag_nonuniform_ati)
{
rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
}
// adjust weights based on rotatemode
switch(rotatemode)
{
case ROTATEMODE_RGBA_RGBA: awt = 1.0f; break;
case ROTATEMODE_RGBA_AGBR: awt = rwt; break;
case ROTATEMODE_RGBA_RABG: awt = gwt; break;
case ROTATEMODE_RGBA_RGAB: awt = bwt; break;
default: nvUnreachable();
}
// weigh the components
err *= awt;
}
return err * err;
}
float Utils::premult(float r, float a)
{
// note that the args are really integers stored in floats
int R = int(r), A = int(a);
nvAssert ((R==r) && (A==a));
return float((R*A + 127)/255);
}
static void premult4(Vector4& rgba)
{
rgba.x = Utils::premult(rgba.x, rgba.w);
rgba.y = Utils::premult(rgba.y, rgba.w);
rgba.z = Utils::premult(rgba.z, rgba.w);
}
static void premult3(Vector3& rgb, float a)
{
rgb.x = Utils::premult(rgb.x, a);
rgb.y = Utils::premult(rgb.y, a);
rgb.z = Utils::premult(rgb.z, a);
}
float Utils::metric4premult(Vector4::Arg a, Vector4::Arg b)
{
Vector4 pma = a, pmb = b;
premult4(pma);
premult4(pmb);
Vector4 err = pma - pmb;
// if nonuniform, select weights and weigh away
if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
{
float rwt, gwt, bwt;
if (AVPCL::flag_nonuniform)
{
rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
}
else /*if (AVPCL::flag_nonuniform_ati)*/
{
rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
}
// weigh the components
err.x *= rwt;
err.y *= gwt;
err.z *= bwt;
}
return lengthSquared(err);
}
float Utils::metric3premult_alphaout(Vector3::Arg rgb0, float a0, Vector3::Arg rgb1, float a1)
{
Vector3 pma = rgb0, pmb = rgb1;
premult3(pma, a0);
premult3(pmb, a1);
Vector3 err = pma - pmb;
// if nonuniform, select weights and weigh away
if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
{
float rwt, gwt, bwt;
if (AVPCL::flag_nonuniform)
{
rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
}
else /*if (AVPCL::flag_nonuniform_ati)*/
{
rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
}
// weigh the components
err.x *= rwt;
err.y *= gwt;
err.z *= bwt;
}
return lengthSquared(err);
}
float Utils::metric3premult_alphain(Vector3::Arg rgb0, Vector3::Arg rgb1, int rotatemode)
{
Vector3 pma = rgb0, pmb = rgb1;
switch(rotatemode)
{
case ROTATEMODE_RGBA_RGBA:
// this function isn't supposed to be called for this rotatemode
nvUnreachable();
break;
case ROTATEMODE_RGBA_AGBR:
pma.y = premult(pma.y, pma.x);
pma.z = premult(pma.z, pma.x);
pmb.y = premult(pmb.y, pmb.x);
pmb.z = premult(pmb.z, pmb.x);
break;
case ROTATEMODE_RGBA_RABG:
pma.x = premult(pma.x, pma.y);
pma.z = premult(pma.z, pma.y);
pmb.x = premult(pmb.x, pmb.y);
pmb.z = premult(pmb.z, pmb.y);
break;
case ROTATEMODE_RGBA_RGAB:
pma.x = premult(pma.x, pma.z);
pma.y = premult(pma.y, pma.z);
pmb.x = premult(pmb.x, pmb.z);
pmb.y = premult(pmb.y, pmb.z);
break;
default: nvUnreachable();
}
Vector3 err = pma - pmb;
// if nonuniform, select weights and weigh away
if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
{
float rwt, gwt, bwt;
if (AVPCL::flag_nonuniform)
{
rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
}
else /*if (AVPCL::flag_nonuniform_ati)*/
{
rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
}
// weigh the components
err.x *= rwt;
err.y *= gwt;
err.z *= bwt;
}
return lengthSquared(err);
}
float Utils::metric1premult(float rgb0, float a0, float rgb1, float a1, int rotatemode)
{
float err = premult(rgb0, a0) - premult(rgb1, a1);
// if nonuniform, select weights and weigh away
if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
{
float rwt, gwt, bwt, awt;
if (AVPCL::flag_nonuniform)
{
rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
}
else if (AVPCL::flag_nonuniform_ati)
{
rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
}
// adjust weights based on rotatemode
switch(rotatemode)
{
case ROTATEMODE_RGBA_RGBA: awt = 1.0f; break;
case ROTATEMODE_RGBA_AGBR: awt = rwt; break;
case ROTATEMODE_RGBA_RABG: awt = gwt; break;
case ROTATEMODE_RGBA_RGAB: awt = bwt; break;
default: nvUnreachable();
}
// weigh the components
err *= awt;
}
return err * err;
}

61
3rdparty/nvtt/bc7/avpcl_utils.h vendored Normal file
View File

@@ -0,0 +1,61 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
// utility class holding common routines
#ifndef _AVPCL_UTILS_H
#define _AVPCL_UTILS_H
#include "nvmath/vector.h"
namespace AVPCL {
inline int SIGN_EXTEND(int x, int nb) { return ((((x)&(1<<((nb)-1)))?((~0)<<(nb)):0)|(x)); }
static const int INDEXMODE_BITS = 1; // 2 different index modes
static const int NINDEXMODES = (1<<(INDEXMODE_BITS));
static const int INDEXMODE_ALPHA_IS_3BITS = 0;
static const int INDEXMODE_ALPHA_IS_2BITS = 1;
static const int ROTATEMODE_BITS = 2; // 4 different rotate modes
static const int NROTATEMODES = (1<<(ROTATEMODE_BITS));
static const int ROTATEMODE_RGBA_RGBA = 0;
static const int ROTATEMODE_RGBA_AGBR = 1;
static const int ROTATEMODE_RGBA_RABG = 2;
static const int ROTATEMODE_RGBA_RGAB = 3;
class Utils
{
public:
// error metrics
static float metric4(nv::Vector4::Arg a, nv::Vector4::Arg b);
static float metric3(nv::Vector3::Arg a, nv::Vector3::Arg b, int rotatemode);
static float metric1(float a, float b, int rotatemode);
static float metric4premult(nv::Vector4::Arg rgba0, nv::Vector4::Arg rgba1);
static float metric3premult_alphaout(nv::Vector3::Arg rgb0, float a0, nv::Vector3::Arg rgb1, float a1);
static float metric3premult_alphain(nv::Vector3::Arg rgb0, nv::Vector3::Arg rgb1, int rotatemode);
static float metric1premult(float rgb0, float a0, float rgb1, float a1, int rotatemode);
static float premult(float r, float a);
// quantization and unquantization
static int unquantize(int q, int prec);
static int quantize(float value, int prec);
// lerping
static int lerp(int a, int b, int i, int bias, int denom);
static nv::Vector4 lerp(nv::Vector4::Arg a, nv::Vector4::Arg b, int i, int bias, int denom);
};
}
#endif

76
3rdparty/nvtt/bc7/bits.h vendored Normal file
View File

@@ -0,0 +1,76 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
#ifndef _AVPCL_BITS_H
#define _AVPCL_BITS_H
// read/write a bitstream
#include "nvcore/debug.h"
namespace AVPCL {
class Bits
{
public:
Bits(char *data, int maxdatabits) { nvAssert (data && maxdatabits > 0); bptr = bend = 0; bits = data; maxbits = maxdatabits; readonly = 0;}
Bits(const char *data, int availdatabits) { nvAssert (data && availdatabits > 0); bptr = 0; bend = availdatabits; cbits = data; maxbits = availdatabits; readonly = 1;}
void write(int value, int nbits) {
nvAssert (nbits >= 0 && nbits < 32);
nvAssert (sizeof(int)>= 4);
for (int i=0; i<nbits; ++i)
writeone(value>>i);
}
int read(int nbits) {
nvAssert (nbits >= 0 && nbits < 32);
nvAssert (sizeof(int)>= 4);
int out = 0;
for (int i=0; i<nbits; ++i)
out |= readone() << i;
return out;
}
int getptr() { return bptr; }
void setptr(int ptr) { nvAssert (ptr >= 0 && ptr < maxbits); bptr = ptr; }
int getsize() { return bend; }
private:
int bptr; // next bit to read
int bend; // last written bit + 1
char *bits; // ptr to user bit stream
const char *cbits; // ptr to const user bit stream
int maxbits; // max size of user bit stream
char readonly; // 1 if this is a read-only stream
int readone() {
nvAssert (bptr < bend);
if (bptr >= bend) return 0;
int bit = (readonly ? cbits[bptr>>3] : bits[bptr>>3]) & (1 << (bptr & 7));
++bptr;
return bit != 0;
}
void writeone(int bit) {
nvAssert (!readonly); // "Writing a read-only bit stream"
nvAssert (bptr < maxbits);
if (bptr >= maxbits) return;
if (bit&1)
bits[bptr>>3] |= 1 << (bptr & 7);
else
bits[bptr>>3] &= ~(1 << (bptr & 7));
if (bptr++ >= bend) bend = bptr;
}
};
}
#endif

81
3rdparty/nvtt/bc7/endpts.h vendored Normal file
View File

@@ -0,0 +1,81 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
#ifndef _AVPCL_ENDPTS_H
#define _AVPCL_ENDPTS_H
// endpoint definitions and routines to search through endpoint space
#include "nvmath/vector.h"
namespace AVPCL {
static const int NCHANNELS_RGB = 3;
static const int NCHANNELS_RGBA = 4;
static const int CHANNEL_R = 0;
static const int CHANNEL_G = 1;
static const int CHANNEL_B = 2;
static const int CHANNEL_A = 3;
struct FltEndpts
{
nv::Vector4 A;
nv::Vector4 B;
};
struct IntEndptsRGB
{
int A[NCHANNELS_RGB];
int B[NCHANNELS_RGB];
};
struct IntEndptsRGB_1
{
int A[NCHANNELS_RGB];
int B[NCHANNELS_RGB];
int lsb; // shared lsb for A and B
};
struct IntEndptsRGB_2
{
int A[NCHANNELS_RGB];
int B[NCHANNELS_RGB];
int a_lsb; // lsb for A
int b_lsb; // lsb for B
};
struct IntEndptsRGBA
{
int A[NCHANNELS_RGBA];
int B[NCHANNELS_RGBA];
};
struct IntEndptsRGBA_2
{
int A[NCHANNELS_RGBA];
int B[NCHANNELS_RGBA];
int a_lsb; // lsb for A
int b_lsb; // lsb for B
};
struct IntEndptsRGBA_2a
{
int A[NCHANNELS_RGBA];
int B[NCHANNELS_RGBA];
int a_lsb; // lsb for RGB channels of A
int b_lsb; // lsb for RGB channels of A
};
}
#endif

132
3rdparty/nvtt/bc7/shapes_three.h vendored Normal file
View File

@@ -0,0 +1,132 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
#ifndef _AVPCL_SHAPES_THREE_H
#define _AVPCL_SHAPES_THREE_H
// shapes for 3 regions
#define NREGIONS 3
#define NSHAPES 64
#define SHAPEBITS 6
static int shapes[NSHAPES*16] =
{
0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 2, 2,
0, 0, 1, 1, 0, 0, 1, 1, 2, 0, 0, 1, 0, 0, 2, 2,
0, 2, 2, 1, 2, 2, 1, 1, 2, 2, 1, 1, 0, 0, 1, 1,
2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 1, 0, 1, 1, 1,
0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 1, 1,
0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 1, 1,
1, 1, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1,
1, 1, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,
0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 2,
1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 1, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 1, 2,
0, 1, 1, 2, 0, 1, 2, 2, 0, 0, 1, 1, 0, 0, 1, 1,
0, 1, 1, 2, 0, 1, 2, 2, 0, 1, 1, 2, 2, 0, 0, 1,
0, 1, 1, 2, 0, 1, 2, 2, 1, 1, 2, 2, 2, 2, 0, 0,
0, 1, 1, 2, 0, 1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 0,
0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 2, 2,
0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 0, 0, 2, 2,
0, 1, 1, 2, 2, 0, 0, 1, 1, 1, 2, 2, 0, 0, 2, 2,
1, 1, 2, 2, 2, 2, 0, 0, 1, 1, 2, 2, 1, 1, 1, 1,
0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0,
0, 2, 2, 2, 2, 2, 2, 1, 0, 1, 2, 2, 2, 2, 1, 0,
0, 2, 2, 2, 2, 2, 2, 1, 0, 1, 2, 2, 2, 2, 1, 0,
0, 1, 2, 2, 0, 0, 1, 2, 0, 1, 1, 0, 0, 0, 0, 0,
0, 1, 2, 2, 0, 0, 1, 2, 1, 2, 2, 1, 0, 1, 1, 0,
0, 0, 1, 1, 1, 1, 2, 2, 1, 2, 2, 1, 1, 2, 2, 1,
0, 0, 0, 0, 2, 2, 2, 2, 0, 1, 1, 0, 1, 2, 2, 1,
0, 0, 2, 2, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0,
1, 1, 0, 2, 0, 1, 1, 0, 0, 1, 2, 2, 2, 0, 0, 0,
1, 1, 0, 2, 2, 0, 0, 2, 0, 1, 2, 2, 2, 2, 1, 1,
0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 1, 1, 2, 2, 2, 1,
0, 0, 0, 0, 0, 2, 2, 2, 0, 0, 1, 1, 0, 1, 2, 0,
0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 1, 2, 0, 1, 2, 0,
1, 1, 2, 2, 0, 0, 1, 2, 0, 0, 2, 2, 0, 1, 2, 0,
1, 2, 2, 2, 0, 0, 1, 1, 0, 2, 2, 2, 0, 1, 2, 0,
0, 0, 0, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 0, 1, 1,
1, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 2, 2, 2, 0, 0,
2, 2, 2, 2, 2, 0, 1, 2, 1, 2, 0, 1, 1, 1, 2, 2,
0, 0, 0, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 0, 1, 1,
0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 2, 2,
1, 1, 2, 2, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 2, 2,
2, 2, 0, 0, 2, 2, 2, 2, 2, 1, 2, 1, 0, 0, 2, 2,
0, 0, 1, 1, 2, 2, 2, 2, 2, 1, 2, 1, 1, 1, 2, 2,
0, 0, 2, 2, 0, 2, 2, 0, 0, 1, 0, 1, 0, 0, 0, 0,
0, 0, 1, 1, 1, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2, 1,
0, 0, 2, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 1, 2, 1,
0, 0, 1, 1, 1, 2, 2, 1, 0, 1, 0, 1, 2, 1, 2, 1,
0, 1, 0, 1, 0, 2, 2, 2, 0, 0, 0, 2, 0, 0, 0, 0,
0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2,
0, 1, 0, 1, 0, 2, 2, 2, 0, 0, 0, 2, 2, 1, 1, 2,
2, 2, 2, 2, 0, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2,
0, 2, 2, 2, 0, 0, 0, 2, 0, 1, 1, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 0, 2, 1, 1, 2,
0, 2, 2, 2, 0, 0, 0, 2, 2, 2, 2, 2, 2, 1, 1, 2,
0, 1, 1, 0, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 0, 0,
0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 2, 2, 0, 0, 0, 0,
2, 2, 2, 2, 0, 0, 1, 1, 1, 1, 2, 2, 0, 0, 0, 0,
2, 2, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 2, 1, 1, 2,
0, 0, 0, 2, 0, 2, 2, 2, 0, 1, 0, 1, 0, 1, 1, 1,
0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 1,
0, 0, 0, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1,
0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0,
};
#define REGION(x,y,si) shapes[((si)&3)*4+((si)>>2)*64+(x)+(y)*16]
static int shapeindex_to_compressed_indices[NSHAPES*3] =
{
0, 3,15, 0, 3, 8, 0,15, 8, 0,15, 3,
0, 8,15, 0, 3,15, 0,15, 3, 0,15, 8,
0, 8,15, 0, 8,15, 0, 6,15, 0, 6,15,
0, 6,15, 0, 5,15, 0, 3,15, 0, 3, 8,
0, 3,15, 0, 3, 8, 0, 8,15, 0,15, 3,
0, 3,15, 0, 3, 8, 0, 6,15, 0,10, 8,
0, 5, 3, 0, 8,15, 0, 8, 6, 0, 6,10,
0, 8,15, 0, 5,15, 0,15,10, 0,15, 8,
0, 8,15, 0,15, 3, 0, 3,15, 0, 5,10,
0, 6,10, 0,10, 8, 0, 8, 9, 0,15,10,
0,15, 6, 0, 3,15, 0,15, 8, 0, 5,15,
0,15, 3, 0,15, 6, 0,15, 6, 0,15, 8,
0, 3,15, 0,15, 3, 0, 5,15, 0, 5,15,
0, 5,15, 0, 8,15, 0, 5,15, 0,10,15,
0, 5,15, 0,10,15, 0, 8,15, 0,13,15,
0,15, 3, 0,12,15, 0, 3,15, 0, 3, 8
};
#define SHAPEINDEX_TO_COMPRESSED_INDICES(si,region) shapeindex_to_compressed_indices[(si)*3+(region)]
#endif

133
3rdparty/nvtt/bc7/shapes_two.h vendored Normal file
View File

@@ -0,0 +1,133 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
#ifndef _AVPCL_SHAPES_TWO_H
#define _AVPCL_SHAPES_TWO_H
// shapes for two regions
#define NREGIONS 2
#define NSHAPES 64
#define SHAPEBITS 6
static int shapes[NSHAPES*16] =
{
0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1,
0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1,
0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1,
0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1,
0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1,
0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1,
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1,
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1,
1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1,
1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1,
1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0,
0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1,
0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1,
0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1,
0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0,
0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0,
0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1,
0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1,
1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1,
1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0,
0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1,
0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1,
0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0,
0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0,
0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1,
1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0,
0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0,
1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1,
0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1,
0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1,
1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1,
1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0,
1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0,
1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0,
0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0,
0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1,
1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1,
1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0,
0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0,
0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1,
1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0,
1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0,
1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1,
0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0,
1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0,
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1,
0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1,
};
#define REGION(x,y,si) shapes[((si)&3)*4+((si)>>2)*64+(x)+(y)*16]
static int shapeindex_to_compressed_indices[NSHAPES*2] =
{
0,15, 0,15, 0,15, 0,15,
0,15, 0,15, 0,15, 0,15,
0,15, 0,15, 0,15, 0,15,
0,15, 0,15, 0,15, 0,15,
0,15, 0, 2, 0, 8, 0, 2,
0, 2, 0, 8, 0, 8, 0,15,
0, 2, 0, 8, 0, 2, 0, 2,
0, 8, 0, 8, 0, 2, 0, 2,
0,15, 0,15, 0, 6, 0, 8,
0, 2, 0, 8, 0,15, 0,15,
0, 2, 0, 8, 0, 2, 0, 2,
0, 2, 0,15, 0,15, 0, 6,
0, 6, 0, 2, 0, 6, 0, 8,
0,15, 0,15, 0, 2, 0, 2,
0,15, 0,15, 0,15, 0,15,
0,15, 0, 2, 0, 2, 0,15
};
#define SHAPEINDEX_TO_COMPRESSED_INDICES(si,region) shapeindex_to_compressed_indices[(si)*2+(region)]
#endif

41
3rdparty/nvtt/bc7/tile.h vendored Normal file
View File

@@ -0,0 +1,41 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
#ifndef _AVPCL_TILE_H
#define _AVPCL_TILE_H
#include "nvmath/vector.h"
#include <math.h>
#include "avpcl_utils.h"
namespace AVPCL {
// extract a tile of pixels from an array
class Tile
{
public:
static const int TILE_H = 4;
static const int TILE_W = 4;
static const int TILE_TOTAL = TILE_H * TILE_W;
nv::Vector4 data[TILE_H][TILE_W];
float importance_map[TILE_H][TILE_W];
int size_x, size_y; // actual size of tile
Tile() {};
~Tile(){};
Tile(int xs, int ys) {size_x = xs; size_y = ys;}
};
}
#endif

181
3rdparty/nvtt/nvcore/array.h vendored Normal file
View File

@@ -0,0 +1,181 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
#ifndef NV_CORE_ARRAY_H
#define NV_CORE_ARRAY_H
/*
This array class requires the elements to be relocable; it uses memmove and realloc. Ideally I should be
using swap, but I honestly don't care. The only thing that you should be aware of is that internal pointers
are not supported.
Note also that push_back and resize does not support inserting arguments elements that are in the same
container. This is forbidden to prevent an extra copy.
*/
#include "memory.h"
#include "debug.h"
#include "foreach.h" // pseudoindex
namespace nv
{
class Stream;
/**
* Replacement for std::vector that is easier to debug and provides
* some nice foreach enumerators.
*/
template<typename T>
class NVCORE_CLASS Array {
public:
typedef uint size_type;
// Default constructor.
NV_FORCEINLINE Array() : m_buffer(NULL), m_capacity(0), m_size(0) {}
// Copy constructor.
NV_FORCEINLINE Array(const Array & a) : m_buffer(NULL), m_capacity(0), m_size(0) {
copy(a.m_buffer, a.m_size);
}
// Constructor that initializes the vector with the given elements.
NV_FORCEINLINE Array(const T * ptr, uint num) : m_buffer(NULL), m_capacity(0), m_size(0) {
copy(ptr, num);
}
// Allocate array.
NV_FORCEINLINE explicit Array(uint capacity) : m_buffer(NULL), m_capacity(0), m_size(0) {
setArrayCapacity(capacity);
}
// Destructor.
NV_FORCEINLINE ~Array() {
clear();
free<T>(m_buffer);
}
/// Const element access.
NV_FORCEINLINE const T & operator[]( uint index ) const
{
nvDebugCheck(index < m_size);
return m_buffer[index];
}
NV_FORCEINLINE const T & at( uint index ) const
{
nvDebugCheck(index < m_size);
return m_buffer[index];
}
/// Element access.
NV_FORCEINLINE T & operator[] ( uint index )
{
nvDebugCheck(index < m_size);
return m_buffer[index];
}
NV_FORCEINLINE T & at( uint index )
{
nvDebugCheck(index < m_size);
return m_buffer[index];
}
/// Get vector size.
NV_FORCEINLINE uint size() const { return m_size; }
/// Get vector size.
NV_FORCEINLINE uint count() const { return m_size; }
/// Get vector capacity.
NV_FORCEINLINE uint capacity() const { return m_capacity; }
/// Get const vector pointer.
NV_FORCEINLINE const T * buffer() const { return m_buffer; }
/// Get vector pointer.
NV_FORCEINLINE T * buffer() { return m_buffer; }
/// Provide begin/end pointers for C++11 range-based for loops.
NV_FORCEINLINE T * begin() { return m_buffer; }
NV_FORCEINLINE T * end() { return m_buffer + m_size; }
NV_FORCEINLINE const T * begin() const { return m_buffer; }
NV_FORCEINLINE const T * end() const { return m_buffer + m_size; }
/// Is vector empty.
NV_FORCEINLINE bool isEmpty() const { return m_size == 0; }
/// Is a null vector.
NV_FORCEINLINE bool isNull() const { return m_buffer == NULL; }
T & append();
void push_back( const T & val );
void pushBack( const T & val );
Array<T> & append( const T & val );
Array<T> & operator<< ( T & t );
void pop_back();
void popBack(uint count = 1);
void popFront(uint count = 1);
const T & back() const;
T & back();
const T & front() const;
T & front();
bool contains(const T & e) const;
bool find(const T & element, uint * indexPtr) const;
bool find(const T & element, uint begin, uint end, uint * indexPtr) const;
void removeAt(uint index);
bool remove(const T & element);
void insertAt(uint index, const T & val = T());
void append(const Array<T> & other);
void append(const T other[], uint count);
void replaceWithLast(uint index);
void resize(uint new_size);
void resize(uint new_size, const T & elem);
void fill(const T & elem);
void clear();
void shrink();
void reserve(uint desired_size);
void copy(const T * data, uint count);
Array<T> & operator=( const Array<T> & a );
T * release();
// Array enumerator.
typedef uint PseudoIndex;
NV_FORCEINLINE PseudoIndex start() const { return 0; }
NV_FORCEINLINE bool isDone(const PseudoIndex & i) const { nvDebugCheck(i <= this->m_size); return i == this->m_size; }
NV_FORCEINLINE void advance(PseudoIndex & i) const { nvDebugCheck(i <= this->m_size); i++; }
#if NV_CC_MSVC
NV_FORCEINLINE T & operator[]( const PseudoIndexWrapper & i ) {
return m_buffer[i(this)];
}
NV_FORCEINLINE const T & operator[]( const PseudoIndexWrapper & i ) const {
return m_buffer[i(this)];
}
#endif
// Friends.
template <typename Typ>
friend Stream & operator<< ( Stream & s, Array<Typ> & p );
template <typename Typ>
friend void swap(Array<Typ> & a, Array<Typ> & b);
protected:
void setArraySize(uint new_size);
void setArrayCapacity(uint new_capacity);
T * m_buffer;
uint m_capacity;
uint m_size;
};
} // nv namespace
#endif // NV_CORE_ARRAY_H

437
3rdparty/nvtt/nvcore/array.inl vendored Normal file
View File

@@ -0,0 +1,437 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
#ifndef NV_CORE_ARRAY_INL
#define NV_CORE_ARRAY_INL
#include "array.h"
#include "stream.h"
#include "utils.h" // swap
#include <string.h> // memmove
#include <new> // for placement new
namespace nv
{
template <typename T>
NV_FORCEINLINE T & Array<T>::append()
{
uint old_size = m_size;
uint new_size = m_size + 1;
setArraySize(new_size);
construct_range(m_buffer, new_size, old_size);
return m_buffer[old_size]; // Return reference to last element.
}
// Push an element at the end of the vector.
template <typename T>
NV_FORCEINLINE void Array<T>::push_back( const T & val )
{
#if 1
nvDebugCheck(&val < m_buffer || &val >= m_buffer+m_size);
uint old_size = m_size;
uint new_size = m_size + 1;
setArraySize(new_size);
construct_range(m_buffer, new_size, old_size, val);
#else
uint new_size = m_size + 1;
if (new_size > m_capacity)
{
// @@ Is there any way to avoid this copy?
// @@ Can we create a copy without side effects? Ie. without calls to constructor/destructor. Use alloca + memcpy?
// @@ Assert instead of copy?
const T copy(val); // create a copy in case value is inside of this array.
setArraySize(new_size);
new (m_buffer+new_size-1) T(copy);
}
else
{
m_size = new_size;
new(m_buffer+new_size-1) T(val);
}
#endif // 0/1
}
template <typename T>
NV_FORCEINLINE void Array<T>::pushBack( const T & val )
{
push_back(val);
}
template <typename T>
NV_FORCEINLINE Array<T> & Array<T>::append( const T & val )
{
push_back(val);
return *this;
}
// Qt like push operator.
template <typename T>
NV_FORCEINLINE Array<T> & Array<T>::operator<< ( T & t )
{
push_back(t);
return *this;
}
// Pop the element at the end of the vector.
template <typename T>
NV_FORCEINLINE void Array<T>::pop_back()
{
nvDebugCheck( m_size > 0 );
resize( m_size - 1 );
}
template <typename T>
NV_FORCEINLINE void Array<T>::popBack(uint count)
{
nvDebugCheck(m_size >= count);
resize(m_size - count);
}
template <typename T>
NV_FORCEINLINE void Array<T>::popFront(uint count)
{
nvDebugCheck(m_size >= count);
//resize(m_size - count);
if (m_size == count) {
clear();
}
else {
destroy_range(m_buffer, 0, count);
memmove(m_buffer, m_buffer + count, sizeof(T) * (m_size - count));
m_size -= count;
}
}
// Get back element.
template <typename T>
NV_FORCEINLINE const T & Array<T>::back() const
{
nvDebugCheck( m_size > 0 );
return m_buffer[m_size-1];
}
// Get back element.
template <typename T>
NV_FORCEINLINE T & Array<T>::back()
{
nvDebugCheck( m_size > 0 );
return m_buffer[m_size-1];
}
// Get front element.
template <typename T>
NV_FORCEINLINE const T & Array<T>::front() const
{
nvDebugCheck( m_size > 0 );
return m_buffer[0];
}
// Get front element.
template <typename T>
NV_FORCEINLINE T & Array<T>::front()
{
nvDebugCheck( m_size > 0 );
return m_buffer[0];
}
// Check if the given element is contained in the array.
template <typename T>
NV_FORCEINLINE bool Array<T>::contains(const T & e) const
{
return find(e, NULL);
}
// Return true if element found.
template <typename T>
NV_FORCEINLINE bool Array<T>::find(const T & element, uint * indexPtr) const
{
return find(element, 0, m_size, indexPtr);
}
// Return true if element found within the given range.
template <typename T>
NV_FORCEINLINE bool Array<T>::find(const T & element, uint begin, uint end, uint * indexPtr) const
{
return ::nv::find(element, m_buffer, begin, end, indexPtr);
}
// Remove the element at the given index. This is an expensive operation!
template <typename T>
void Array<T>::removeAt(uint index)
{
nvDebugCheck(index >= 0 && index < m_size);
if (m_size == 1) {
clear();
}
else {
m_buffer[index].~T();
memmove(m_buffer+index, m_buffer+index+1, sizeof(T) * (m_size - 1 - index));
m_size--;
}
}
// Remove the first instance of the given element.
template <typename T>
bool Array<T>::remove(const T & element)
{
uint index;
if (find(element, &index)) {
removeAt(index);
return true;
}
return false;
}
// Insert the given element at the given index shifting all the elements up.
template <typename T>
void Array<T>::insertAt(uint index, const T & val/*=T()*/)
{
nvDebugCheck( index >= 0 && index <= m_size );
setArraySize(m_size + 1);
if (index < m_size - 1) {
memmove(m_buffer+index+1, m_buffer+index, sizeof(T) * (m_size - 1 - index));
}
// Copy-construct into the newly opened slot.
new(m_buffer+index) T(val);
}
// Append the given data to our vector.
template <typename T>
NV_FORCEINLINE void Array<T>::append(const Array<T> & other)
{
append(other.m_buffer, other.m_size);
}
// Append the given data to our vector.
template <typename T>
void Array<T>::append(const T other[], uint count)
{
if (count > 0) {
const uint old_size = m_size;
setArraySize(m_size + count);
for (uint i = 0; i < count; i++ ) {
new(m_buffer + old_size + i) T(other[i]);
}
}
}
// Remove the given element by replacing it with the last one.
template <typename T>
void Array<T>::replaceWithLast(uint index)
{
nvDebugCheck( index < m_size );
nv::swap(m_buffer[index], back()); // @@ Is this OK when index == size-1?
(m_buffer+m_size-1)->~T();
m_size--;
}
// Resize the vector preserving existing elements.
template <typename T>
void Array<T>::resize(uint new_size)
{
uint old_size = m_size;
// Destruct old elements (if we're shrinking).
destroy_range(m_buffer, new_size, old_size);
setArraySize(new_size);
// Call default constructors
construct_range(m_buffer, new_size, old_size);
}
// Resize the vector preserving existing elements and initializing the
// new ones with the given value.
template <typename T>
void Array<T>::resize(uint new_size, const T & elem)
{
nvDebugCheck(&elem < m_buffer || &elem > m_buffer+m_size);
uint old_size = m_size;
// Destruct old elements (if we're shrinking).
destroy_range(m_buffer, new_size, old_size);
setArraySize(new_size);
// Call copy constructors
construct_range(m_buffer, new_size, old_size, elem);
}
// Fill array with the given value.
template <typename T>
void Array<T>::fill(const T & elem)
{
fill(m_buffer, m_size, elem);
}
// Clear the buffer.
template <typename T>
NV_FORCEINLINE void Array<T>::clear()
{
nvDebugCheck(isValidPtr(m_buffer));
// Destruct old elements
destroy_range(m_buffer, 0, m_size);
m_size = 0;
}
// Shrink the allocated vector.
template <typename T>
NV_FORCEINLINE void Array<T>::shrink()
{
if (m_size < m_capacity) {
setArrayCapacity(m_size);
}
}
// Preallocate space.
template <typename T>
NV_FORCEINLINE void Array<T>::reserve(uint desired_size)
{
if (desired_size > m_capacity) {
setArrayCapacity(desired_size);
}
}
// Copy elements to this array. Resizes it if needed.
template <typename T>
NV_FORCEINLINE void Array<T>::copy(const T * data, uint count)
{
#if 1 // More simple, but maybe not be as efficient?
destroy_range(m_buffer, 0, m_size);
setArraySize(count);
construct_range(m_buffer, count, 0, data);
#else
const uint old_size = m_size;
destroy_range(m_buffer, count, old_size);
setArraySize(count);
copy_range(m_buffer, data, old_size);
construct_range(m_buffer, count, old_size, data);
#endif
}
// Assignment operator.
template <typename T>
NV_FORCEINLINE Array<T> & Array<T>::operator=( const Array<T> & a )
{
copy(a.m_buffer, a.m_size);
return *this;
}
// Release ownership of allocated memory and returns pointer to it.
template <typename T>
T * Array<T>::release() {
T * tmp = m_buffer;
m_buffer = NULL;
m_capacity = 0;
m_size = 0;
return tmp;
}
// Change array size.
template <typename T>
inline void Array<T>::setArraySize(uint new_size) {
m_size = new_size;
if (new_size > m_capacity) {
uint new_buffer_size;
if (m_capacity == 0) {
// first allocation is exact
new_buffer_size = new_size;
}
else {
// following allocations grow array by 25%
new_buffer_size = new_size + (new_size >> 2);
}
setArrayCapacity( new_buffer_size );
}
}
// Change array capacity.
template <typename T>
inline void Array<T>::setArrayCapacity(uint new_capacity) {
nvDebugCheck(new_capacity >= m_size);
if (new_capacity == 0) {
// free the buffer.
if (m_buffer != NULL) {
free<T>(m_buffer);
m_buffer = NULL;
}
}
else {
// realloc the buffer
m_buffer = realloc<T>(m_buffer, new_capacity);
}
m_capacity = new_capacity;
}
// Array serialization.
template <typename Typ>
inline Stream & operator<< ( Stream & s, Array<Typ> & p )
{
if (s.isLoading()) {
uint size;
s << size;
p.resize( size );
}
else {
s << p.m_size;
}
for (uint i = 0; i < p.m_size; i++) {
s << p.m_buffer[i];
}
return s;
}
// Swap the members of the two given vectors.
template <typename Typ>
inline void swap(Array<Typ> & a, Array<Typ> & b)
{
nv::swap(a.m_buffer, b.m_buffer);
nv::swap(a.m_capacity, b.m_capacity);
nv::swap(a.m_size, b.m_size);
}
} // nv namespace
#endif // NV_CORE_ARRAY_INL

216
3rdparty/nvtt/nvcore/debug.h vendored Normal file
View File

@@ -0,0 +1,216 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
#ifndef NV_CORE_DEBUG_H
#define NV_CORE_DEBUG_H
#include "nvcore.h"
#include <stdarg.h> // va_list
// Make sure we are using our assert.
#undef assert
#define NV_ABORT_DEBUG 1
#define NV_ABORT_IGNORE 2
#define NV_ABORT_EXIT 3
#define nvNoAssert(exp) \
NV_MULTI_LINE_MACRO_BEGIN \
(void)sizeof(exp); \
NV_MULTI_LINE_MACRO_END
#if NV_NO_ASSERT
# define nvAssert(exp) nvNoAssert(exp)
# define nvCheck(exp) nvNoAssert(exp)
# define nvDebugAssert(exp) nvNoAssert(exp)
# define nvDebugCheck(exp) nvNoAssert(exp)
# define nvDebugBreak() nvNoAssert(0)
#else // NV_NO_ASSERT
# if NV_CC_MSVC
// @@ Does this work in msvc-6 and earlier?
# define nvDebugBreak() __debugbreak()
//# define nvDebugBreak() __asm { int 3 }
# elif NV_OS_ORBIS
# define nvDebugBreak() __debugbreak()
# elif NV_CC_GNUC
# define nvDebugBreak() __builtin_trap()
# else
# error "No nvDebugBreak()!"
# endif
/*
# elif NV_CC_GNUC || NV_CPU_PPC && NV_OS_DARWIN
// @@ Use __builtin_trap() on GCC
# define nvDebugBreak() __asm__ volatile ("trap")
# elif (NV_CC_GNUC || NV_CPU_X86 || NV_CPU_X86_64) && NV_OS_DARWIN
# define nvDebugBreak() __asm__ volatile ("int3")
# elif NV_CC_GNUC || NV_CPU_X86 || NV_CPU_X86_64
# define nvDebugBreak() __asm__ ( "int %0" : :"I"(3) )
# else
# include <signal.h>
# define nvDebugBreak() raise(SIGTRAP)
# endif
*/
#define nvDebugBreakOnce() \
NV_MULTI_LINE_MACRO_BEGIN \
static bool firstTime = true; \
if (firstTime) { firstTime = false; nvDebugBreak(); } \
NV_MULTI_LINE_MACRO_END
#define nvAssertMacro(exp) \
NV_MULTI_LINE_MACRO_BEGIN \
if (!(exp)) { \
if (nvAbort(#exp, __FILE__, __LINE__, __FUNC__) == NV_ABORT_DEBUG) { \
nvDebugBreak(); \
} \
} \
NV_MULTI_LINE_MACRO_END
// GCC, LLVM need "##" before the __VA_ARGS__, MSVC doesn't care
#define nvAssertMacroWithIgnoreAll(exp,...) \
NV_MULTI_LINE_MACRO_BEGIN \
static bool ignoreAll = false; \
if (!ignoreAll && !(exp)) { \
int result = nvAbort(#exp, __FILE__, __LINE__, __FUNC__, ##__VA_ARGS__); \
if (result == NV_ABORT_DEBUG) { \
nvDebugBreak(); \
} else if (result == NV_ABORT_IGNORE) { \
ignoreAll = true; \
} \
} \
NV_MULTI_LINE_MACRO_END
// Interesting assert macro from Insomniac:
// http://www.gdcvault.com/play/1015319/Developing-Imperfect-Software-How-to
// Used as follows:
// if (nvCheck(i < count)) {
// normal path
// } else {
// fixup code.
// }
// This style of macro could be combined with __builtin_expect to let the compiler know failure is unlikely.
#define nvCheckMacro(exp) \
(\
(exp) ? true : ( \
(nvAbort(#exp, __FILE__, __LINE__, __FUNC__) == NV_ABORT_DEBUG) ? (nvDebugBreak(), true) : ( false ) \
) \
)
#define nvAssert(exp) nvAssertMacro(exp)
#define nvCheck(exp) nvAssertMacro(exp)
#if defined(_DEBUG)
# define nvDebugAssert(exp) nvAssertMacro(exp)
# define nvDebugCheck(exp) nvAssertMacro(exp)
#else // _DEBUG
# define nvDebugAssert(exp) nvNoAssert(exp)
# define nvDebugCheck(exp) nvNoAssert(exp)
#endif // _DEBUG
#endif // NV_NO_ASSERT
// Use nvAssume for very simple expresions only: nvAssume(0), nvAssume(value == true), etc.
/*#if !defined(_DEBUG)
# if NV_CC_MSVC
# define nvAssume(exp) __assume(exp)
# else
# define nvAssume(exp) nvCheck(exp)
# endif
#else
# define nvAssume(exp) nvCheck(exp)
#endif*/
#if defined(_DEBUG)
# if NV_CC_MSVC
# define nvUnreachable() nvAssert(0 && "unreachable"); __assume(0)
# else
# define nvUnreachable() nvAssert(0 && "unreachable"); __builtin_unreachable()
# endif
#else
# if NV_CC_MSVC
# define nvUnreachable() __assume(0)
# else
# define nvUnreachable() __builtin_unreachable()
# endif
#endif
#define nvError(x) nvAbort(x, __FILE__, __LINE__, __FUNC__)
#define nvWarning(x) nvDebugPrint("*** Warning %s/%d: %s\n", __FILE__, __LINE__, (x))
#ifndef NV_DEBUG_PRINT
#define NV_DEBUG_PRINT 1 //defined(_DEBUG)
#endif
#if NV_DEBUG_PRINT
#define nvDebug(...) nvDebugPrint(__VA_ARGS__)
#else
#if NV_CC_MSVC
#define nvDebug(...) __noop(__VA_ARGS__)
#else
#define nvDebug(...) ((void)0) // Non-msvc platforms do not evaluate arguments?
#endif
#endif
NVCORE_API int nvAbort(const char *exp, const char *file, int line, const char * func = NULL, const char * msg = NULL, ...) __attribute__((format (printf, 5, 6)));
NVCORE_API void NV_CDECL nvDebugPrint( const char *msg, ... ) __attribute__((format (printf, 1, 2)));
namespace nv
{
inline bool isValidPtr(const void * ptr) {
#if NV_CPU_X86_64
if (ptr == NULL) return true;
if (reinterpret_cast<uint64>(ptr) < 0x10000ULL) return false;
if (reinterpret_cast<uint64>(ptr) >= 0x000007FFFFFEFFFFULL) return false;
#else
if (reinterpret_cast<uint32>(ptr) == 0xcccccccc) return false;
if (reinterpret_cast<uint32>(ptr) == 0xcdcdcdcd) return false;
if (reinterpret_cast<uint32>(ptr) == 0xdddddddd) return false;
if (reinterpret_cast<uint32>(ptr) == 0xffffffff) return false;
#endif
return true;
}
// Message handler interface.
struct MessageHandler {
virtual void log(const char * str, va_list arg) = 0;
virtual ~MessageHandler() {}
};
// Assert handler interface.
struct AssertHandler {
virtual int assertion(const char *exp, const char *file, int line, const char *func, const char *msg, va_list arg) = 0;
virtual ~AssertHandler() {}
};
namespace debug
{
NVCORE_API void dumpInfo();
NVCORE_API void dumpCallstack( MessageHandler *messageHandler, int callstackLevelsToSkip = 0 );
NVCORE_API void setMessageHandler( MessageHandler * messageHandler );
NVCORE_API void resetMessageHandler();
NVCORE_API void setAssertHandler( AssertHandler * assertHanlder );
NVCORE_API void resetAssertHandler();
NVCORE_API void enableSigHandler(bool interactive);
NVCORE_API void disableSigHandler();
NVCORE_API bool isDebuggerPresent();
NVCORE_API bool attachToDebugger();
NVCORE_API void terminate(int code);
}
} // nv namespace
#endif // NV_CORE_DEBUG_H

57
3rdparty/nvtt/nvcore/defsgnucdarwin.h vendored Normal file
View File

@@ -0,0 +1,57 @@
#ifndef NV_CORE_H
#error "Do not include this file directly."
#endif
#include <stdint.h> // uint8_t, int8_t, ... uintptr_t
#include <stddef.h> // operator new, size_t, NULL
#ifndef __STDC_VERSION__
# define __STDC_VERSION__ 0
#endif // __STDC_VERSION__
// Function linkage
#define DLL_IMPORT
#if __GNUC__ >= 4
# define DLL_EXPORT __attribute__((visibility("default")))
# define DLL_EXPORT_CLASS DLL_EXPORT
#else
# define DLL_EXPORT
# define DLL_EXPORT_CLASS
#endif
// Function calling modes
#if NV_CPU_X86
# define NV_CDECL __attribute__((cdecl))
# define NV_STDCALL __attribute__((stdcall))
#else
# define NV_CDECL
# define NV_STDCALL
#endif
#define NV_FASTCALL __attribute__((fastcall))
#define NV_FORCEINLINE inline
#define NV_DEPRECATED __attribute__((deprecated))
#define NV_THREAD_LOCAL //ACS: there's no "__thread" or equivalent on iOS/OSX
#if __GNUC__ > 2
#define NV_PURE __attribute__((pure))
#define NV_CONST __attribute__((const))
#else
#define NV_PURE
#define NV_CONST
#endif
#define NV_NOINLINE __attribute__((noinline))
// Define __FUNC__ properly.
#if defined(__STDC_VERSION__) && __STDC_VERSION__ < 199901L
# if __GNUC__ >= 2
# define __FUNC__ __PRETTY_FUNCTION__ // __FUNCTION__
# else
# define __FUNC__ "<unknown>"
# endif
#else
# define __FUNC__ __PRETTY_FUNCTION__
#endif
#define restrict __restrict__

63
3rdparty/nvtt/nvcore/defsgnuclinux.h vendored Normal file
View File

@@ -0,0 +1,63 @@
#ifndef NV_CORE_H
#error "Do not include this file directly."
#endif
#include <stdint.h> // uint8_t, int8_t, ... uintptr_t
#include <stddef.h> // operator new, size_t, NULL
#ifndef __STDC_VERSION__
# define __STDC_VERSION__ 0
#endif
// Function linkage
#define DLL_IMPORT
#if __GNUC__ >= 4
# define DLL_EXPORT __attribute__((visibility("default")))
# define DLL_EXPORT_CLASS DLL_EXPORT
#else
# define DLL_EXPORT
# define DLL_EXPORT_CLASS
#endif
// Function calling modes
#if NV_CPU_X86
# define NV_CDECL __attribute__((cdecl))
# define NV_STDCALL __attribute__((stdcall))
#else
# define NV_CDECL
# define NV_STDCALL
#endif
#define NV_FASTCALL __attribute__((fastcall))
//#if __GNUC__ > 3
// It seems that GCC does not assume always_inline implies inline. I think this depends on the GCC version :(
#define NV_FORCEINLINE inline
//#else
// Some compilers complain that inline and always_inline are redundant.
//#define NV_FORCEINLINE __attribute__((always_inline))
//#endif
#define NV_DEPRECATED __attribute__((deprecated))
#define NV_THREAD_LOCAL __thread
#if __GNUC__ > 2
#define NV_PURE __attribute__((pure))
#define NV_CONST __attribute__((const))
#else
#define NV_PURE
#define NV_CONST
#endif
#define NV_NOINLINE __attribute__((noinline))
// Define __FUNC__ properly.
#if defined(__STDC_VERSION__) && __STDC_VERSION__ < 199901L
# if __GNUC__ >= 2
# define __FUNC__ __PRETTY_FUNCTION__ // __FUNCTION__
# else
# define __FUNC__ "<unknown>"
# endif
#else
# define __FUNC__ __PRETTY_FUNCTION__
#endif
#define restrict __restrict__

65
3rdparty/nvtt/nvcore/defsgnucwin32.h vendored Normal file
View File

@@ -0,0 +1,65 @@
#ifndef NV_CORE_H
#error "Do not include this file directly."
#endif
//#include <cstddef> // size_t, NULL
// Function linkage
#define DLL_IMPORT __declspec(dllimport)
#define DLL_EXPORT __declspec(dllexport)
#define DLL_EXPORT_CLASS DLL_EXPORT
// Function calling modes
#if NV_CPU_X86
# define NV_CDECL __attribute__((cdecl))
# define NV_STDCALL __attribute__((stdcall))
#else
# define NV_CDECL
# define NV_STDCALL
#endif
#define NV_FASTCALL __attribute__((fastcall))
#define NV_FORCEINLINE inline
#define NV_DEPRECATED __attribute__((deprecated))
#if __GNUC__ > 2
#define NV_PURE __attribute__((pure))
#define NV_CONST __attribute__((const))
#else
#define NV_PURE
#define NV_CONST
#endif
#define NV_NOINLINE __attribute__((noinline))
// Define __FUNC__ properly.
#if defined(__STDC_VERSION__) && __STDC_VERSION__ < 199901L
# if __GNUC__ >= 2
# define __FUNC__ __PRETTY_FUNCTION__ // __FUNCTION__
# else
# define __FUNC__ "<unknown>"
# endif
#else
# define __FUNC__ __PRETTY_FUNCTION__
#endif
#define restrict __restrict__
/*
// Type definitions
typedef unsigned char uint8;
typedef signed char int8;
typedef unsigned short uint16;
typedef signed short int16;
typedef unsigned int uint32;
typedef signed int int32;
typedef unsigned long long uint64;
typedef signed long long int64;
// Aliases
typedef uint32 uint;
*/

94
3rdparty/nvtt/nvcore/defsvcwin32.h vendored Normal file
View File

@@ -0,0 +1,94 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
#ifndef NV_CORE_H
#error "Do not include this file directly."
#endif
// Function linkage
#define DLL_IMPORT __declspec(dllimport)
#define DLL_EXPORT __declspec(dllexport)
#define DLL_EXPORT_CLASS DLL_EXPORT
// Function calling modes
#define NV_CDECL __cdecl
#define NV_STDCALL __stdcall
#define NV_FASTCALL __fastcall
#define NV_DEPRECATED
#define NV_PURE
#define NV_CONST
// Set standard function names.
#if _MSC_VER < 1900
# define snprintf _snprintf
#endif
#if _MSC_VER < 1500
# define vsnprintf _vsnprintf
#endif
#if _MSC_VER < 1700
# define strtoll _strtoi64
# define strtoull _strtoui64
#endif
#define chdir _chdir
#define getcwd _getcwd
#if _MSC_VER < 1800 // Not sure what version introduced this.
#define va_copy(a, b) (a) = (b)
#endif
#if !defined restrict
#define restrict
#endif
// Ignore gcc attributes.
#define __attribute__(X)
#if !defined __FUNC__
#define __FUNC__ __FUNCTION__
#endif
#define NV_NOINLINE __declspec(noinline)
#define NV_FORCEINLINE inline
#define NV_THREAD_LOCAL __declspec(thread)
/*
// Type definitions
typedef unsigned char uint8;
typedef signed char int8;
typedef unsigned short uint16;
typedef signed short int16;
typedef unsigned int uint32;
typedef signed int int32;
typedef unsigned __int64 uint64;
typedef signed __int64 int64;
// Aliases
typedef uint32 uint;
*/
// Unwanted VC++ warnings to disable.
/*
#pragma warning(disable : 4244) // conversion to float, possible loss of data
#pragma warning(disable : 4245) // conversion from 'enum ' to 'unsigned long', signed/unsigned mismatch
#pragma warning(disable : 4100) // unreferenced formal parameter
#pragma warning(disable : 4514) // unreferenced inline function has been removed
#pragma warning(disable : 4710) // inline function not expanded
#pragma warning(disable : 4127) // Conditional expression is constant
#pragma warning(disable : 4305) // truncation from 'const double' to 'float'
#pragma warning(disable : 4505) // unreferenced local function has been removed
#pragma warning(disable : 4702) // unreachable code in inline expanded function
#pragma warning(disable : 4711) // function selected for automatic inlining
#pragma warning(disable : 4725) // Pentium fdiv bug
#pragma warning(disable : 4786) // Identifier was truncated and cannot be debugged.
#pragma warning(disable : 4675) // resolved overload was found by argument-dependent lookup
*/
#pragma warning(1 : 4705) // Report unused local variables.
#pragma warning(1 : 4555) // Expression has no effect.

68
3rdparty/nvtt/nvcore/foreach.h vendored Normal file
View File

@@ -0,0 +1,68 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
#pragma once
#ifndef NV_CORE_FOREACH_H
#define NV_CORE_FOREACH_H
/*
These foreach macros are very non-standard and somewhat confusing, but I like them.
*/
#include "nvcore.h"
#if NV_CC_GNUC // If typeof or decltype is available:
#if !NV_CC_CPP11
# define NV_DECLTYPE typeof // Using a non-standard extension over typeof that behaves as C++11 decltype
#else
# define NV_DECLTYPE decltype
#endif
/*
Ideally we would like to write this:
#define NV_FOREACH(i, container) \
for(NV_DECLTYPE(container)::PseudoIndex i((container).start()); !(container).isDone(i); (container).advance(i))
But gcc versions prior to 4.7 required an intermediate type. See:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=6709
*/
#define NV_FOREACH(i, container) \
typedef NV_DECLTYPE(container) NV_STRING_JOIN2(cont,__LINE__); \
for(NV_STRING_JOIN2(cont,__LINE__)::PseudoIndex i((container).start()); !(container).isDone(i); (container).advance(i))
#else // If typeof not available:
#include <new> // placement new
struct PseudoIndexWrapper {
template <typename T>
PseudoIndexWrapper(const T & container) {
nvStaticCheck(sizeof(typename T::PseudoIndex) <= sizeof(memory));
new (memory) typename T::PseudoIndex(container.start());
}
// PseudoIndex cannot have a dtor!
template <typename T> typename T::PseudoIndex & operator()(const T * /*container*/) {
return *reinterpret_cast<typename T::PseudoIndex *>(memory);
}
template <typename T> const typename T::PseudoIndex & operator()(const T * /*container*/) const {
return *reinterpret_cast<const typename T::PseudoIndex *>(memory);
}
uint8 memory[4]; // Increase the size if we have bigger enumerators.
};
#define NV_FOREACH(i, container) \
for(PseudoIndexWrapper i(container); !(container).isDone(i(&(container))); (container).advance(i(&(container))))
#endif
// Declare foreach keyword.
#if !defined NV_NO_USE_KEYWORDS
# define foreach NV_FOREACH
# define foreach_index NV_FOREACH
#endif
#endif // NV_CORE_FOREACH_H

83
3rdparty/nvtt/nvcore/hash.h vendored Normal file
View File

@@ -0,0 +1,83 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
#pragma once
#ifndef NV_CORE_HASH_H
#define NV_CORE_HASH_H
#include "nvcore.h"
namespace nv
{
inline uint sdbmHash(const void * data_in, uint size, uint h = 5381)
{
const uint8 * data = (const uint8 *) data_in;
uint i = 0;
while (i < size) {
h = (h << 16) + (h << 6) - h + (uint) data[i++];
}
return h;
}
// Note that this hash does not handle NaN properly.
inline uint sdbmFloatHash(const float * f, uint count, uint h = 5381)
{
for (uint i = 0; i < count; i++) {
//nvDebugCheck(nv::isFinite(*f));
union { float f; uint32 i; } x = { f[i] };
if (x.i == 0x80000000) x.i = 0;
h = sdbmHash(&x, 4, h);
}
return h;
}
template <typename T>
inline uint hash(const T & t, uint h = 5381)
{
return sdbmHash(&t, sizeof(T), h);
}
template <>
inline uint hash(const float & f, uint h)
{
return sdbmFloatHash(&f, 1, h);
}
// Functors for hash table:
template <typename Key> struct Hash
{
uint operator()(const Key & k) const {
return hash(k);
}
};
template <typename Key> struct Equal
{
bool operator()(const Key & k0, const Key & k1) const {
return k0 == k1;
}
};
// @@ Move to Utils.h?
template <typename T1, typename T2>
struct Pair {
T1 first;
T2 second;
};
template <typename T1, typename T2>
bool operator==(const Pair<T1,T2> & p0, const Pair<T1,T2> & p1) {
return p0.first == p1.first && p0.second == p1.second;
}
template <typename T1, typename T2>
uint hash(const Pair<T1,T2> & p, uint h = 5381) {
return hash(p.second, hash(p.first));
}
} // nv namespace
#endif // NV_CORE_HASH_H

30
3rdparty/nvtt/nvcore/memory.h vendored Normal file
View File

@@ -0,0 +1,30 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
#ifndef NV_CORE_MEMORY_H
#define NV_CORE_MEMORY_H
#include "nvcore.h"
#include <stdlib.h>
namespace nv {
// C++ helpers.
template <typename T> inline T * malloc(size_t count) {
return (T *)::malloc(sizeof(T) * count);
}
template <typename T> inline T * realloc(T * ptr, size_t count) {
return (T *)::realloc(ptr, sizeof(T) * count);
}
template <typename T> inline void free(const T * ptr) {
::free((void *)ptr);
}
template <typename T> inline void zero(T & data) {
memset(&data, 0, sizeof(T));
}
} // nv namespace
#endif // NV_CORE_MEMORY_H

363
3rdparty/nvtt/nvcore/nvcore.h vendored Normal file
View File

@@ -0,0 +1,363 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
#ifndef NV_CORE_H
#define NV_CORE_H
#define NVCORE_SHARED 0
#define NV_NO_ASSERT 0
// Function linkage
#if NVCORE_SHARED
#ifdef NVCORE_EXPORTS
#define NVCORE_API DLL_EXPORT
#define NVCORE_CLASS DLL_EXPORT_CLASS
#else
#define NVCORE_API DLL_IMPORT
#define NVCORE_CLASS DLL_IMPORT
#endif
#else // NVCORE_SHARED
#define NVCORE_API
#define NVCORE_CLASS
#endif // NVCORE_SHARED
// Platform definitions
#include "posh.h"
#define NV_OS_STRING POSH_OS_STRING
#if defined POSH_OS_LINUX
# define NV_OS_LINUX 1
# define NV_OS_UNIX 1
#elif defined POSH_OS_ORBIS
# define NV_OS_ORBIS 1
#elif defined POSH_OS_FREEBSD
# define NV_OS_FREEBSD 1
# define NV_OS_UNIX 1
#elif defined POSH_OS_OPENBSD
# define NV_OS_OPENBSD 1
# define NV_OS_UNIX 1
#elif defined POSH_OS_CYGWIN32
# define NV_OS_CYGWIN 1
#elif defined POSH_OS_MINGW
# define NV_OS_MINGW 1
# define NV_OS_WIN32 1
#elif defined POSH_OS_OSX
# define NV_OS_DARWIN 1
# define NV_OS_UNIX 1
#elif defined POSH_OS_IOS
# define NV_OS_DARWIN 1 //ACS should we keep this on IOS?
# define NV_OS_UNIX 1
# define NV_OS_IOS 1
#elif defined POSH_OS_UNIX
# define NV_OS_UNIX 1
#elif defined POSH_OS_WIN64
# define NV_OS_WIN32 1
# define NV_OS_WIN64 1
#elif defined POSH_OS_WIN32
# define NV_OS_WIN32 1
#elif defined POSH_OS_XBOX
# define NV_OS_XBOX 1
#else
# error "Unsupported OS"
#endif
#ifndef NV_OS_WIN32
# define NV_OS_WIN32 0
#endif // NV_OS_WIN32
#ifndef NV_OS_WIN64
# define NV_OS_WIN64 0
#endif // NV_OS_WIN64
#ifndef NV_OS_MINGW
# define NV_OS_MINGW 0
#endif // NV_OS_MINGW
#ifndef NV_OS_CYGWIN
# define NV_OS_CYGWIN 0
#endif // NV_OS_CYGWIN
#ifndef NV_OS_LINUX
# define NV_OS_LINUX 0
#endif // NV_OS_LINUX
#ifndef NV_OS_FREEBSD
# define NV_OS_FREEBSD 0
#endif // NV_OS_FREEBSD
#ifndef NV_OS_OPENBSD
# define NV_OS_OPENBSD 0
#endif // NV_OS_OPENBSD
#ifndef NV_OS_UNIX
# define NV_OS_UNIX 0
#endif // NV_OS_UNIX
#ifndef NV_OS_DARWIN
# define NV_OS_DARWIN 0
#endif // NV_OS_DARWIN
#ifndef NV_OS_XBOX
# define NV_OS_XBOX 0
#endif // NV_OS_XBOX
#ifndef NV_OS_ORBIS
# define NV_OS_ORBIS 0
#endif // NV_OS_ORBIS
#ifndef NV_OS_IOS
# define NV_OS_IOS 0
#endif // NV_OS_IOS
// Threading:
// some platforms don't implement __thread or similar for thread-local-storage
#if NV_OS_UNIX || NV_OS_ORBIS || NV_OS_IOS //ACStodoIOS darwin instead of ios?
# define NV_OS_USE_PTHREAD 1
# if NV_OS_DARWIN || NV_OS_IOS
# define NV_OS_HAS_TLS_QUALIFIER 0
# else
# define NV_OS_HAS_TLS_QUALIFIER 1
# endif
#else
# define NV_OS_USE_PTHREAD 0
# define NV_OS_HAS_TLS_QUALIFIER 1
#endif
// CPUs:
#define NV_CPU_STRING POSH_CPU_STRING
#if defined POSH_CPU_X86_64
//# define NV_CPU_X86 1
# define NV_CPU_X86_64 1
#elif defined POSH_CPU_X86
# define NV_CPU_X86 1
#elif defined POSH_CPU_PPC
# define NV_CPU_PPC 1
#elif defined POSH_CPU_STRONGARM
# define NV_CPU_ARM 1
#elif defined POSH_CPU_AARCH64
# define NV_CPU_AARCH64 1
#else
# error "Unsupported CPU"
#endif
#ifndef NV_CPU_X86
# define NV_CPU_X86 0
#endif // NV_CPU_X86
#ifndef NV_CPU_X86_64
# define NV_CPU_X86_64 0
#endif // NV_CPU_X86_64
#ifndef NV_CPU_PPC
# define NV_CPU_PPC 0
#endif // NV_CPU_PPC
#ifndef NV_CPU_ARM
# define NV_CPU_ARM 0
#endif // NV_CPU_ARM
#ifndef NV_CPU_AARCH64
# define NV_CPU_AARCH64 0
#endif // NV_CPU_AARCH64
// Compiler:
#if defined POSH_COMPILER_CLANG
# define NV_CC_CLANG 1
# define NV_CC_GNUC 1 // Clang is compatible with GCC.
# define NV_CC_STRING "clang"
# pragma clang diagnostic ignored "-Wmissing-braces"
# pragma clang diagnostic ignored "-Wshadow"
# pragma clang diagnostic ignored "-Wunused-local-typedef"
# pragma clang diagnostic ignored "-Wunused-function"
# pragma clang diagnostic ignored "-Wunused-variable"
# pragma clang diagnostic ignored "-Wunused-parameter"
# pragma clang diagnostic ignored "-Wsometimes-uninitialized"
#elif defined POSH_COMPILER_GCC
# define NV_CC_GNUC 1
# define NV_CC_STRING "gcc"
# pragma GCC diagnostic ignored "-Wshadow"
# pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
# pragma GCC diagnostic ignored "-Wunused-function"
# pragma GCC diagnostic ignored "-Wunused-but-set-variable"
# pragma GCC diagnostic ignored "-Wunused-variable"
# pragma GCC diagnostic ignored "-Wunused-parameter"
# pragma GCC diagnostic ignored "-Warray-bounds"
#elif defined POSH_COMPILER_MSVC
# define NV_CC_MSVC 1
# define NV_CC_STRING "msvc"
#else
# error "Unsupported compiler"
#endif
#ifndef NV_CC_GNUC
# define NV_CC_GNUC 0
#endif // NV_CC_GNUC
#ifndef NV_CC_MSVC
# define NV_CC_MSVC 0
#endif // NV_CC_MSVC
#ifndef NV_CC_CLANG
# define NV_CC_CLANG 0
#endif // NV_CC_CLANG
#if NV_CC_MSVC
#define NV_CC_CPP11 (__cplusplus > 199711L || _MSC_VER >= 1800) // Visual Studio 2013 has all the features we use, but doesn't advertise full C++11 support yet.
#else
// @@ IC: This works in CLANG, about GCC?
// @@ ES: Doesn't work in gcc. These 3 features are available in GCC >= 4.4.
#ifdef __clang__
#define NV_CC_CPP11 (__has_feature(cxx_deleted_functions) && __has_feature(cxx_rvalue_references) && __has_feature(cxx_static_assert))
#elif defined __GNUC__
#define NV_CC_CPP11 ( __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))
#endif
#endif
// Endiannes:
#define NV_LITTLE_ENDIAN POSH_LITTLE_ENDIAN
#define NV_BIG_ENDIAN POSH_BIG_ENDIAN
#define NV_ENDIAN_STRING POSH_ENDIAN_STRING
// Type definitions:
typedef posh_u8_t uint8;
typedef posh_i8_t int8;
typedef posh_u16_t uint16;
typedef posh_i16_t int16;
typedef posh_u32_t uint32;
typedef posh_i32_t int32;
typedef posh_u64_t uint64;
typedef posh_i64_t int64;
// Aliases
typedef uint32 uint;
// Version string:
#define NV_VERSION_STRING \
NV_OS_STRING "/" NV_CC_STRING "/" NV_CPU_STRING"/" \
NV_ENDIAN_STRING"-endian - " __DATE__ "-" __TIME__
// Disable copy constructor and assignment operator.
#if NV_CC_CPP11
#define NV_FORBID_COPY(C) \
C( const C & ) = delete; \
C &operator=( const C & ) = delete
#else
#define NV_FORBID_COPY(C) \
private: \
C( const C & ); \
C &operator=( const C & )
#endif
// Disable dynamic allocation on the heap.
// See Prohibiting Heap-Based Objects in More Effective C++.
#define NV_FORBID_HEAPALLOC() \
private: \
void *operator new(size_t size); \
void *operator new[](size_t size)
// String concatenation macros.
#define NV_STRING_JOIN2(arg1, arg2) NV_DO_STRING_JOIN2(arg1, arg2)
#define NV_DO_STRING_JOIN2(arg1, arg2) arg1 ## arg2
#define NV_STRING_JOIN3(arg1, arg2, arg3) NV_DO_STRING_JOIN3(arg1, arg2, arg3)
#define NV_DO_STRING_JOIN3(arg1, arg2, arg3) arg1 ## arg2 ## arg3
#define NV_STRING2(x) #x
#define NV_STRING(x) NV_STRING2(x)
#if NV_CC_MSVC
#define NV_MULTI_LINE_MACRO_BEGIN do {
#define NV_MULTI_LINE_MACRO_END \
__pragma(warning(push)) \
__pragma(warning(disable:4127)) \
} while(false) \
__pragma(warning(pop))
#else
#define NV_MULTI_LINE_MACRO_BEGIN do {
#define NV_MULTI_LINE_MACRO_END } while(false)
#endif
#if NV_CC_CPP11
#define nvStaticCheck(x) static_assert((x), "Static assert "#x" failed")
#else
#define nvStaticCheck(x) typedef char NV_STRING_JOIN2(__static_assert_,__LINE__)[(x)]
#endif
#define NV_COMPILER_CHECK(x) nvStaticCheck(x) // I like this name best.
// Make sure type definitions are fine.
NV_COMPILER_CHECK(sizeof(int8) == 1);
NV_COMPILER_CHECK(sizeof(uint8) == 1);
NV_COMPILER_CHECK(sizeof(int16) == 2);
NV_COMPILER_CHECK(sizeof(uint16) == 2);
NV_COMPILER_CHECK(sizeof(int32) == 4);
NV_COMPILER_CHECK(sizeof(uint32) == 4);
NV_COMPILER_CHECK(sizeof(int32) == 4);
NV_COMPILER_CHECK(sizeof(uint32) == 4);
#define NV_ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
#if 0 // Disabled in The Witness.
#if NV_CC_MSVC
#define NV_MESSAGE(x) message(__FILE__ "(" NV_STRING(__LINE__) ") : " x)
#else
#define NV_MESSAGE(x) message(x)
#endif
#else
#define NV_MESSAGE(x)
#endif
// Startup initialization macro.
#define NV_AT_STARTUP(some_code) \
namespace { \
static struct NV_STRING_JOIN2(AtStartup_, __LINE__) { \
NV_STRING_JOIN2(AtStartup_, __LINE__)() { some_code; } \
} \
NV_STRING_JOIN3(AtStartup_, __LINE__, Instance); \
}
// Indicate the compiler that the parameter is not used to suppress compier warnings.
#define NV_UNUSED(a) ((a)=(a))
// Null index. @@ Move this somewhere else... it's only used by nvmesh.
//const unsigned int NIL = unsigned int(~0);
//#define NIL uint(~0)
// Null pointer.
#ifndef NULL
#define NULL 0
#endif
// Platform includes
#if NV_CC_MSVC
# if NV_OS_WIN32
# include "defsvcwin32.h"
# elif NV_OS_XBOX
# include "defsvcxbox.h"
# else
# error "MSVC: Platform not supported"
# endif
#elif NV_CC_GNUC
# if NV_OS_LINUX
# include "defsgnuclinux.h"
# elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD
# include "defsgnucdarwin.h"
# elif NV_OS_MINGW
# include "defsgnucwin32.h"
# elif NV_OS_CYGWIN
# error "GCC: Cygwin not supported"
# else
# error "GCC: Platform not supported"
# endif
#endif
#endif // NV_CORE_H

1030
3rdparty/nvtt/nvcore/posh.h vendored Normal file

File diff suppressed because it is too large Load Diff

459
3rdparty/nvtt/nvcore/stdstream.h vendored Normal file
View File

@@ -0,0 +1,459 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
#include "nvcore.h"
#include "stream.h"
#include "array.h"
#include <stdio.h> // fopen
#include <string.h> // memcpy
namespace nv
{
// Portable version of fopen.
inline FILE * fileOpen(const char * fileName, const char * mode)
{
nvCheck(fileName != NULL);
#if NV_CC_MSVC && _MSC_VER >= 1400
FILE * fp;
if (fopen_s(&fp, fileName, mode) == 0) {
return fp;
}
return NULL;
#else
return fopen(fileName, mode);
#endif
}
/// Base stdio stream.
class NVCORE_CLASS StdStream : public Stream
{
NV_FORBID_COPY(StdStream);
public:
/// Ctor.
StdStream( FILE * fp, bool autoclose ) : m_fp(fp), m_autoclose(autoclose) { }
/// Dtor.
virtual ~StdStream()
{
if( m_fp != NULL && m_autoclose ) {
#if NV_OS_WIN32
_fclose_nolock( m_fp );
#else
fclose( m_fp );
#endif
}
}
/** @name Stream implementation. */
//@{
virtual void seek( uint pos )
{
nvDebugCheck(m_fp != NULL);
nvDebugCheck(pos <= size());
#if NV_OS_WIN32
_fseek_nolock(m_fp, pos, SEEK_SET);
#else
fseek(m_fp, pos, SEEK_SET);
#endif
}
virtual uint tell() const
{
nvDebugCheck(m_fp != NULL);
#if NV_OS_WIN32
return _ftell_nolock(m_fp);
#else
return (uint)ftell(m_fp);
#endif
}
virtual uint size() const
{
nvDebugCheck(m_fp != NULL);
#if NV_OS_WIN32
uint pos = _ftell_nolock(m_fp);
_fseek_nolock(m_fp, 0, SEEK_END);
uint end = _ftell_nolock(m_fp);
_fseek_nolock(m_fp, pos, SEEK_SET);
#else
uint pos = (uint)ftell(m_fp);
fseek(m_fp, 0, SEEK_END);
uint end = (uint)ftell(m_fp);
fseek(m_fp, pos, SEEK_SET);
#endif
return end;
}
virtual bool isError() const
{
return m_fp == NULL || ferror( m_fp ) != 0;
}
virtual void clearError()
{
nvDebugCheck(m_fp != NULL);
clearerr(m_fp);
}
// @@ The original implementation uses feof, which only returns true when we attempt to read *past* the end of the stream.
// That is, if we read the last byte of a file, then isAtEnd would still return false, even though the stream pointer is at the file end. This is not the intent and was inconsistent with the implementation of the MemoryStream, a better
// implementation uses use ftell and fseek to determine our location within the file.
virtual bool isAtEnd() const
{
if (m_fp == NULL) return true;
//nvDebugCheck(m_fp != NULL);
//return feof( m_fp ) != 0;
#if NV_OS_WIN32
uint pos = _ftell_nolock(m_fp);
_fseek_nolock(m_fp, 0, SEEK_END);
uint end = _ftell_nolock(m_fp);
_fseek_nolock(m_fp, pos, SEEK_SET);
#else
uint pos = (uint)ftell(m_fp);
fseek(m_fp, 0, SEEK_END);
uint end = (uint)ftell(m_fp);
fseek(m_fp, pos, SEEK_SET);
#endif
return pos == end;
}
/// Always true.
virtual bool isSeekable() const { return true; }
//@}
protected:
FILE * m_fp;
bool m_autoclose;
};
/// Standard output stream.
class NVCORE_CLASS StdOutputStream : public StdStream
{
NV_FORBID_COPY(StdOutputStream);
public:
/// Construct stream by file name.
StdOutputStream( const char * name ) : StdStream(fileOpen(name, "wb"), /*autoclose=*/true) { }
/// Construct stream by file handle.
StdOutputStream( FILE * fp, bool autoclose ) : StdStream(fp, autoclose)
{
}
/** @name Stream implementation. */
//@{
/// Write data.
virtual uint serialize( void * data, uint len )
{
nvDebugCheck(data != NULL);
nvDebugCheck(m_fp != NULL);
#if NV_OS_WIN32
return (uint)_fwrite_nolock(data, 1, len, m_fp);
#elif NV_OS_LINUX
return (uint)fwrite_unlocked(data, 1, len, m_fp);
#elif NV_OS_DARWIN
// @@ No error checking, always returns len.
for (uint i = 0; i < len; i++) {
putc_unlocked(((char *)data)[i], m_fp);
}
return len;
#else
return (uint)fwrite(data, 1, len, m_fp);
#endif
}
virtual bool isLoading() const
{
return false;
}
virtual bool isSaving() const
{
return true;
}
//@}
};
/// Standard input stream.
class NVCORE_CLASS StdInputStream : public StdStream
{
NV_FORBID_COPY(StdInputStream);
public:
/// Construct stream by file name.
StdInputStream( const char * name ) : StdStream(fileOpen(name, "rb"), /*autoclose=*/true) { }
/// Construct stream by file handle.
StdInputStream( FILE * fp, bool autoclose=true ) : StdStream(fp, autoclose)
{
}
/** @name Stream implementation. */
//@{
/// Read data.
virtual uint serialize( void * data, uint len )
{
nvDebugCheck(data != NULL);
nvDebugCheck(m_fp != NULL);
#if NV_OS_WIN32
return (uint)_fread_nolock(data, 1, len, m_fp);
#elif NV_OS_LINUX
return (uint)fread_unlocked(data, 1, len, m_fp);
#elif NV_OS_DARWIN
// @@ No error checking, always returns len.
for (uint i = 0; i < len; i++) {
((char *)data)[i] = getc_unlocked(m_fp);
}
return len;
#else
return (uint)fread(data, 1, len, m_fp);
#endif
}
virtual bool isLoading() const
{
return true;
}
virtual bool isSaving() const
{
return false;
}
//@}
};
/// Memory input stream.
class NVCORE_CLASS MemoryInputStream : public Stream
{
NV_FORBID_COPY(MemoryInputStream);
public:
/// Ctor.
MemoryInputStream( const uint8 * mem, uint size ) : m_mem(mem), m_ptr(mem), m_size(size) { }
/** @name Stream implementation. */
//@{
/// Read data.
virtual uint serialize( void * data, uint len )
{
nvDebugCheck(data != NULL);
nvDebugCheck(!isError());
uint left = m_size - tell();
if (len > left) len = left;
memcpy( data, m_ptr, len );
m_ptr += len;
return len;
}
virtual void seek( uint pos )
{
nvDebugCheck(!isError());
m_ptr = m_mem + pos;
nvDebugCheck(!isError());
}
virtual uint tell() const
{
nvDebugCheck(m_ptr >= m_mem);
return uint(m_ptr - m_mem);
}
virtual uint size() const
{
return m_size;
}
virtual bool isError() const
{
return m_mem == NULL || m_ptr > m_mem + m_size || m_ptr < m_mem;
}
virtual void clearError()
{
// Nothing to do.
}
virtual bool isAtEnd() const
{
return m_ptr == m_mem + m_size;
}
/// Always true.
virtual bool isSeekable() const
{
return true;
}
virtual bool isLoading() const
{
return true;
}
virtual bool isSaving() const
{
return false;
}
//@}
const uint8 * ptr() const { return m_ptr; }
private:
const uint8 * m_mem;
const uint8 * m_ptr;
uint m_size;
};
/// Buffer output stream.
class NVCORE_CLASS BufferOutputStream : public Stream
{
NV_FORBID_COPY(BufferOutputStream);
public:
BufferOutputStream(Array<uint8> & buffer) : m_buffer(buffer) { }
virtual uint serialize( void * data, uint len )
{
nvDebugCheck(data != NULL);
m_buffer.append((uint8 *)data, len);
return len;
}
virtual void seek( uint /*pos*/ ) { /*Not implemented*/ }
virtual uint tell() const { return m_buffer.size(); }
virtual uint size() const { return m_buffer.size(); }
virtual bool isError() const { return false; }
virtual void clearError() {}
virtual bool isAtEnd() const { return true; }
virtual bool isSeekable() const { return false; }
virtual bool isLoading() const { return false; }
virtual bool isSaving() const { return true; }
private:
Array<uint8> & m_buffer;
};
/// Protected input stream.
class NVCORE_CLASS ProtectedStream : public Stream
{
NV_FORBID_COPY(ProtectedStream);
public:
/// Ctor.
ProtectedStream( Stream & s ) : m_s(&s), m_autodelete(false)
{
}
/// Ctor.
ProtectedStream( Stream * s, bool autodelete = true ) :
m_s(s), m_autodelete(autodelete)
{
nvDebugCheck(m_s != NULL);
}
/// Dtor.
virtual ~ProtectedStream()
{
if( m_autodelete ) {
delete m_s;
}
}
/** @name Stream implementation. */
//@{
/// Read data.
virtual uint serialize( void * data, uint len )
{
nvDebugCheck(data != NULL);
len = m_s->serialize( data, len );
if( m_s->isError() ) {
throw;
}
return len;
}
virtual void seek( uint pos )
{
m_s->seek( pos );
if( m_s->isError() ) {
throw;
}
}
virtual uint tell() const
{
return m_s->tell();
}
virtual uint size() const
{
return m_s->size();
}
virtual bool isError() const
{
return m_s->isError();
}
virtual void clearError()
{
m_s->clearError();
}
virtual bool isAtEnd() const
{
return m_s->isAtEnd();
}
virtual bool isSeekable() const
{
return m_s->isSeekable();
}
virtual bool isLoading() const
{
return m_s->isLoading();
}
virtual bool isSaving() const
{
return m_s->isSaving();
}
//@}
private:
Stream * const m_s;
bool const m_autodelete;
};
} // nv namespace
//#endif // NV_CORE_STDSTREAM_H

163
3rdparty/nvtt/nvcore/stream.h vendored Normal file
View File

@@ -0,0 +1,163 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
#ifndef NV_CORE_STREAM_H
#define NV_CORE_STREAM_H
#include "nvcore.h"
#include "debug.h"
namespace nv
{
/// Base stream class.
class NVCORE_CLASS Stream {
public:
enum ByteOrder {
LittleEndian = false,
BigEndian = true,
};
/// Get the byte order of the system.
static ByteOrder getSystemByteOrder() {
#if NV_LITTLE_ENDIAN
return LittleEndian;
#else
return BigEndian;
#endif
}
/// Ctor.
Stream() : m_byteOrder(LittleEndian) { }
/// Virtual destructor.
virtual ~Stream() {}
/// Set byte order.
void setByteOrder(ByteOrder bo) { m_byteOrder = bo; }
/// Get byte order.
ByteOrder byteOrder() const { return m_byteOrder; }
/// Serialize the given data.
virtual uint serialize( void * data, uint len ) = 0;
/// Move to the given position in the archive.
virtual void seek( uint pos ) = 0;
/// Return the current position in the archive.
virtual uint tell() const = 0;
/// Return the current size of the archive.
virtual uint size() const = 0;
/// Determine if there has been any error.
virtual bool isError() const = 0;
/// Clear errors.
virtual void clearError() = 0;
/// Return true if the stream is at the end.
virtual bool isAtEnd() const = 0;
/// Return true if the stream is seekable.
virtual bool isSeekable() const = 0;
/// Return true if this is an input stream.
virtual bool isLoading() const = 0;
/// Return true if this is an output stream.
virtual bool isSaving() const = 0;
void advance(uint offset) { seek(tell() + offset); }
// friends
friend Stream & operator<<( Stream & s, bool & c ) {
#if NV_OS_DARWIN && !NV_CC_CPP11
nvStaticCheck(sizeof(bool) == 4);
uint8 b = c ? 1 : 0;
s.serialize( &b, 1 );
c = (b == 1);
#else
nvStaticCheck(sizeof(bool) == 1);
s.serialize( &c, 1 );
#endif
return s;
}
friend Stream & operator<<( Stream & s, char & c ) {
nvStaticCheck(sizeof(char) == 1);
s.serialize( &c, 1 );
return s;
}
friend Stream & operator<<( Stream & s, uint8 & c ) {
nvStaticCheck(sizeof(uint8) == 1);
s.serialize( &c, 1 );
return s;
}
friend Stream & operator<<( Stream & s, int8 & c ) {
nvStaticCheck(sizeof(int8) == 1);
s.serialize( &c, 1 );
return s;
}
friend Stream & operator<<( Stream & s, uint16 & c ) {
nvStaticCheck(sizeof(uint16) == 2);
return s.byteOrderSerialize( &c, 2 );
}
friend Stream & operator<<( Stream & s, int16 & c ) {
nvStaticCheck(sizeof(int16) == 2);
return s.byteOrderSerialize( &c, 2 );
}
friend Stream & operator<<( Stream & s, uint32 & c ) {
nvStaticCheck(sizeof(uint32) == 4);
return s.byteOrderSerialize( &c, 4 );
}
friend Stream & operator<<( Stream & s, int32 & c ) {
nvStaticCheck(sizeof(int32) == 4);
return s.byteOrderSerialize( &c, 4 );
}
friend Stream & operator<<( Stream & s, uint64 & c ) {
nvStaticCheck(sizeof(uint64) == 8);
return s.byteOrderSerialize( &c, 8 );
}
friend Stream & operator<<( Stream & s, int64 & c ) {
nvStaticCheck(sizeof(int64) == 8);
return s.byteOrderSerialize( &c, 8 );
}
friend Stream & operator<<( Stream & s, float & c ) {
nvStaticCheck(sizeof(float) == 4);
return s.byteOrderSerialize( &c, 4 );
}
friend Stream & operator<<( Stream & s, double & c ) {
nvStaticCheck(sizeof(double) == 8);
return s.byteOrderSerialize( &c, 8 );
}
protected:
/// Serialize in the stream byte order.
Stream & byteOrderSerialize( void * v, uint len ) {
if( m_byteOrder == getSystemByteOrder() ) {
serialize( v, len );
}
else {
for( uint i = len; i > 0; i-- ) {
serialize( (uint8 *)v + i - 1, 1 );
}
}
return *this;
}
private:
ByteOrder m_byteOrder;
};
} // nv namespace
#endif // NV_CORE_STREAM_H

Some files were not shown because too many files have changed in this diff Show More